Spaces:
Sleeping
Sleeping
import os | |
import time | |
import pathlib | |
import threading | |
import shutil | |
import gradio as gr | |
import yaml | |
import io | |
from loguru import logger | |
from yourbench.pipeline import run_pipeline | |
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files") | |
UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True) | |
CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml") | |
yourbench_log_stream = io.StringIO() | |
def custom_log_handler(message): | |
yourbench_log_stream.write(message + "\n") | |
# yourbench_log_stream.flush() | |
def get_log_content(): | |
yourbench_log_stream.seek(0) | |
content = yourbench_log_stream.read() | |
print(len(content)) | |
return content | |
logger.add(custom_log_handler, filter="yourbench") | |
def start_task(): | |
# Start the long-running task in a separate thread | |
task_thread = threading.Thread(target=run_pipeline, args=(CONFIG_PATH,), daemon=True) | |
task_thread.start() | |
task_thread.join() | |
def generate_config( | |
hf_token, | |
hf_org, | |
model_name, | |
provider, | |
base_url, | |
api_key, | |
max_concurrent_requests, | |
ingestion_source, | |
ingestion_output, | |
run_ingestion, | |
summarization_source, | |
summarization_output, | |
run_summarization | |
): | |
"""Generates a config.yaml based on user inputs""" | |
config = { | |
"hf_configuration": { | |
"token": hf_token, | |
"private": True, | |
"hf_organization": hf_org | |
}, | |
"model_list": [{ | |
"model_name": model_name, | |
"provider": provider, | |
"base_url": base_url, | |
"api_key": api_key, | |
"max_concurrent_requests": max_concurrent_requests | |
}], | |
"pipeline": { | |
"ingestion": { | |
"source_documents_dir": ingestion_source, | |
"output_dir": ingestion_output, | |
"run": run_ingestion | |
}, | |
"summarization": { | |
"source_dataset_name": summarization_source, | |
"output_dataset_name": summarization_output, | |
"run": run_summarization | |
} | |
} | |
} | |
return yaml.dump(config, default_flow_style=False) | |
def save_config(yaml_text): | |
with open(CONFIG_PATH, "w") as file: | |
file.write(yaml_text) | |
return "✅ Config saved as config.yaml!" | |
def save_files(files: list[str]): | |
saved_paths = [] | |
for file in files: | |
file_path = pathlib.Path(file) | |
save_path = UPLOAD_DIRECTORY / file_path.name | |
shutil.move(str(file_path), str(save_path)) | |
saved_paths.append(str(save_path)) | |
return f"Files have been successfully saved to: {', '.join(saved_paths)}" | |
def start_youbench(): | |
run_pipeline(CONFIG_PATH, debug=False) | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("## YourBench Configuration") | |
with gr.Tab("HF Configuration"): | |
hf_token = gr.Textbox(label="HF Token") | |
hf_org = gr.Textbox(label="HF Organization") | |
with gr.Tab("Model Settings"): | |
model_name = gr.Textbox(label="Model Name") | |
provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider") | |
base_url = gr.Textbox(label="Base URL") | |
api_key = gr.Textbox(label="API Key") | |
max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests") | |
with gr.Tab("Pipeline Stages"): | |
ingestion_source = gr.Textbox(label="Ingestion Source Directory") | |
ingestion_output = gr.Textbox(label="Ingestion Output Directory") | |
run_ingestion = gr.Checkbox(label="Run Ingestion", value=False) | |
summarization_source = gr.Textbox(label="Summarization Source Dataset") | |
summarization_output = gr.Textbox(label="Summarization Output Dataset") | |
run_summarization = gr.Checkbox(label="Run Summarization", value=False) | |
with gr.Tab("Config"): | |
config_output = gr.Code(label="Generated Config", language="yaml") | |
preview_button = gr.Button("Generate Config") | |
save_button = gr.Button("Save Config") | |
preview_button.click(generate_config, | |
inputs=[hf_token, hf_org, model_name, provider, base_url, api_key, | |
max_concurrent_requests, ingestion_source, ingestion_output, | |
run_ingestion, summarization_source, summarization_output, run_summarization], | |
outputs=config_output) | |
save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")]) | |
with gr.Tab("Files"): | |
file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"]) | |
file_explorer = gr.FileExplorer(root_dir=UPLOAD_DIRECTORY, interactive=False, label="Current Files") | |
output = gr.Textbox(label="Log") | |
file_input.upload(save_files, file_input, output) | |
with gr.Tab("Run Generation"): | |
log_output = gr.Code(label="Log Output", language=None,lines=20, interactive=False) | |
start_button = gr.Button("Start Long-Running Task") | |
timer = gr.Timer(0.5, active=True) | |
timer.tick(get_log_content, outputs=log_output) | |
start_button.click(start_task) | |
app.launch() | |