Spaces:
Sleeping
Sleeping
File size: 5,283 Bytes
d1ed69b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
import time
import pathlib
import threading
import shutil
import gradio as gr
import yaml
import io
from loguru import logger
from yourbench.pipeline import run_pipeline
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
yourbench_log_stream = io.StringIO()
def custom_log_handler(message):
yourbench_log_stream.write(message + "\n")
# yourbench_log_stream.flush()
def get_log_content():
yourbench_log_stream.seek(0)
content = yourbench_log_stream.read()
print(len(content))
return content
logger.add(custom_log_handler, filter="yourbench")
def start_task():
# Start the long-running task in a separate thread
task_thread = threading.Thread(target=run_pipeline, args=(CONFIG_PATH,), daemon=True)
task_thread.start()
task_thread.join()
def generate_config(
hf_token,
hf_org,
model_name,
provider,
base_url,
api_key,
max_concurrent_requests,
ingestion_source,
ingestion_output,
run_ingestion,
summarization_source,
summarization_output,
run_summarization
):
"""Generates a config.yaml based on user inputs"""
config = {
"hf_configuration": {
"token": hf_token,
"private": True,
"hf_organization": hf_org
},
"model_list": [{
"model_name": model_name,
"provider": provider,
"base_url": base_url,
"api_key": api_key,
"max_concurrent_requests": max_concurrent_requests
}],
"pipeline": {
"ingestion": {
"source_documents_dir": ingestion_source,
"output_dir": ingestion_output,
"run": run_ingestion
},
"summarization": {
"source_dataset_name": summarization_source,
"output_dataset_name": summarization_output,
"run": run_summarization
}
}
}
return yaml.dump(config, default_flow_style=False)
def save_config(yaml_text):
with open(CONFIG_PATH, "w") as file:
file.write(yaml_text)
return "✅ Config saved as config.yaml!"
def save_files(files: list[str]):
saved_paths = []
for file in files:
file_path = pathlib.Path(file)
save_path = UPLOAD_DIRECTORY / file_path.name
shutil.move(str(file_path), str(save_path))
saved_paths.append(str(save_path))
return f"Files have been successfully saved to: {', '.join(saved_paths)}"
def start_youbench():
run_pipeline(CONFIG_PATH, debug=False)
app = gr.Blocks()
with app:
gr.Markdown("## YourBench Configuration")
with gr.Tab("HF Configuration"):
hf_token = gr.Textbox(label="HF Token")
hf_org = gr.Textbox(label="HF Organization")
with gr.Tab("Model Settings"):
model_name = gr.Textbox(label="Model Name")
provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider")
base_url = gr.Textbox(label="Base URL")
api_key = gr.Textbox(label="API Key")
max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests")
with gr.Tab("Pipeline Stages"):
ingestion_source = gr.Textbox(label="Ingestion Source Directory")
ingestion_output = gr.Textbox(label="Ingestion Output Directory")
run_ingestion = gr.Checkbox(label="Run Ingestion", value=False)
summarization_source = gr.Textbox(label="Summarization Source Dataset")
summarization_output = gr.Textbox(label="Summarization Output Dataset")
run_summarization = gr.Checkbox(label="Run Summarization", value=False)
with gr.Tab("Config"):
config_output = gr.Code(label="Generated Config", language="yaml")
preview_button = gr.Button("Generate Config")
save_button = gr.Button("Save Config")
preview_button.click(generate_config,
inputs=[hf_token, hf_org, model_name, provider, base_url, api_key,
max_concurrent_requests, ingestion_source, ingestion_output,
run_ingestion, summarization_source, summarization_output, run_summarization],
outputs=config_output)
save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
with gr.Tab("Files"):
file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
file_explorer = gr.FileExplorer(root_dir=UPLOAD_DIRECTORY, interactive=False, label="Current Files")
output = gr.Textbox(label="Log")
file_input.upload(save_files, file_input, output)
with gr.Tab("Run Generation"):
log_output = gr.Code(label="Log Output", language=None,lines=20, interactive=False)
start_button = gr.Button("Start Long-Running Task")
timer = gr.Timer(0.5, active=True)
timer.tick(get_log_content, outputs=log_output)
start_button.click(start_task)
app.launch()
|