Spaces:
Sleeping
Sleeping
Alina Lozovskaya
commited on
Commit
·
570d85c
1
Parent(s):
25580aa
Add resulted datasets [wip]
Browse files- yourbench_space/app.py +21 -0
- yourbench_space/config.py +1 -0
- yourbench_space/utils.py +21 -1
yourbench_space/app.py
CHANGED
@@ -12,6 +12,7 @@ from yourbench_space.utils import (
|
|
12 |
UPLOAD_DIRECTORY,
|
13 |
SubprocessManager,
|
14 |
save_files,
|
|
|
15 |
STAGES,
|
16 |
)
|
17 |
|
@@ -60,6 +61,8 @@ def generate_and_return(hf_org, hf_prefix):
|
|
60 |
)
|
61 |
)
|
62 |
|
|
|
|
|
63 |
def update_process_status():
|
64 |
"""Update process status and include exit details if process has terminated"""
|
65 |
is_running = manager.is_running()
|
@@ -191,5 +194,23 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
191 |
|
192 |
kill_button = gr.Button("Kill Task")
|
193 |
kill_button.click(manager.kill_process)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
|
195 |
app.launch(allowed_paths=["/app"])
|
|
|
12 |
UPLOAD_DIRECTORY,
|
13 |
SubprocessManager,
|
14 |
save_files,
|
15 |
+
update_dataset,
|
16 |
STAGES,
|
17 |
)
|
18 |
|
|
|
61 |
)
|
62 |
)
|
63 |
|
64 |
+
final_dataset = None
|
65 |
+
|
66 |
def update_process_status():
|
67 |
"""Update process status and include exit details if process has terminated"""
|
68 |
is_running = manager.is_running()
|
|
|
194 |
|
195 |
kill_button = gr.Button("Kill Task")
|
196 |
kill_button.click(manager.kill_process)
|
197 |
+
|
198 |
+
with gr.Row():
|
199 |
+
with gr.Accordion("Ingestion"):
|
200 |
+
ingestion_df = gr.DataFrame()
|
201 |
+
|
202 |
+
with gr.Accordion("Summarization"):
|
203 |
+
summarization_df = gr.DataFrame()
|
204 |
+
|
205 |
+
with gr.Accordion("Single-Hop"):
|
206 |
+
single_hop = gr.DataFrame()
|
207 |
+
|
208 |
+
with gr.Accordion("Answer Generation"):
|
209 |
+
answers_df = gr.DataFrame()
|
210 |
+
|
211 |
+
stages_table.change(
|
212 |
+
update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_prefix], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
|
213 |
+
)
|
214 |
+
|
215 |
|
216 |
app.launch(allowed_paths=["/app"])
|
yourbench_space/config.py
CHANGED
@@ -98,3 +98,4 @@ def generate_and_save_config(hf_org, hf_prefix):
|
|
98 |
file_path = save_yaml_file(config)
|
99 |
logger.success(f"Config saved at: {file_path}")
|
100 |
return file_path
|
|
|
|
98 |
file_path = save_yaml_file(config)
|
99 |
logger.success(f"Config saved at: {file_path}")
|
100 |
return file_path
|
101 |
+
|
yourbench_space/utils.py
CHANGED
@@ -3,8 +3,10 @@ import os
|
|
3 |
import re
|
4 |
import pathlib
|
5 |
import shutil
|
6 |
-
from loguru import logger
|
7 |
import subprocess
|
|
|
|
|
|
|
8 |
from typing import List
|
9 |
|
10 |
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
|
@@ -49,6 +51,24 @@ def save_files(files: List[pathlib.Path]) -> str:
|
|
49 |
else "No files were saved"
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
class SubprocessManager:
|
54 |
def __init__(self, command):
|
|
|
3 |
import re
|
4 |
import pathlib
|
5 |
import shutil
|
|
|
6 |
import subprocess
|
7 |
+
import pandas as pd
|
8 |
+
from datasets import load_dataset, get_dataset_config_names
|
9 |
+
from loguru import logger
|
10 |
from typing import List
|
11 |
|
12 |
UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
|
|
|
51 |
else "No files were saved"
|
52 |
)
|
53 |
|
54 |
+
def update_dataset(stages, hf_org, hf_prefix):
|
55 |
+
"""
|
56 |
+
Updates the dataset based on the provided stages and dataset configuration.
|
57 |
+
"""
|
58 |
+
# If not the final step, return empty dataframes
|
59 |
+
if stages[-1] != STAGES[-1]:
|
60 |
+
return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame())
|
61 |
+
|
62 |
+
# Construct dataset name from config
|
63 |
+
dataset_name = f"{hf_org}/{hf_prefix}"
|
64 |
+
|
65 |
+
# Load datasets and convert to DataFrame
|
66 |
+
ingestion_df = pd.DataFrame(load_dataset(dataset_name, name="ingested", split="train", streaming=True))
|
67 |
+
summarization_df = pd.DataFrame(load_dataset(dataset_name, name="summarization", split="train", streaming=True))
|
68 |
+
single_hop = pd.DataFrame(load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True))
|
69 |
+
answers_df = pd.DataFrame(load_dataset(dataset_name, name="answer_generation", split="train", streaming=True))
|
70 |
+
|
71 |
+
return (ingestion_df, summarization_df, single_hop, answers_df)
|
72 |
|
73 |
class SubprocessManager:
|
74 |
def __init__(self, command):
|