Alina Lozovskaya commited on
Commit
570d85c
·
1 Parent(s): 25580aa

Add resulted datasets [wip]

Browse files
yourbench_space/app.py CHANGED
@@ -12,6 +12,7 @@ from yourbench_space.utils import (
12
  UPLOAD_DIRECTORY,
13
  SubprocessManager,
14
  save_files,
 
15
  STAGES,
16
  )
17
 
@@ -60,6 +61,8 @@ def generate_and_return(hf_org, hf_prefix):
60
  )
61
  )
62
 
 
 
63
  def update_process_status():
64
  """Update process status and include exit details if process has terminated"""
65
  is_running = manager.is_running()
@@ -191,5 +194,23 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
191
 
192
  kill_button = gr.Button("Kill Task")
193
  kill_button.click(manager.kill_process)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  app.launch(allowed_paths=["/app"])
 
12
  UPLOAD_DIRECTORY,
13
  SubprocessManager,
14
  save_files,
15
+ update_dataset,
16
  STAGES,
17
  )
18
 
 
61
  )
62
  )
63
 
64
+ final_dataset = None
65
+
66
  def update_process_status():
67
  """Update process status and include exit details if process has terminated"""
68
  is_running = manager.is_running()
 
194
 
195
  kill_button = gr.Button("Kill Task")
196
  kill_button.click(manager.kill_process)
197
+
198
+ with gr.Row():
199
+ with gr.Accordion("Ingestion"):
200
+ ingestion_df = gr.DataFrame()
201
+
202
+ with gr.Accordion("Summarization"):
203
+ summarization_df = gr.DataFrame()
204
+
205
+ with gr.Accordion("Single-Hop"):
206
+ single_hop = gr.DataFrame()
207
+
208
+ with gr.Accordion("Answer Generation"):
209
+ answers_df = gr.DataFrame()
210
+
211
+ stages_table.change(
212
+ update_dataset, inputs=[stages_table, hf_org_dropdown, hf_dataset_prefix], outputs=[ingestion_df, summarization_df, single_hop, answers_df]
213
+ )
214
+
215
 
216
  app.launch(allowed_paths=["/app"])
yourbench_space/config.py CHANGED
@@ -98,3 +98,4 @@ def generate_and_save_config(hf_org, hf_prefix):
98
  file_path = save_yaml_file(config)
99
  logger.success(f"Config saved at: {file_path}")
100
  return file_path
 
 
98
  file_path = save_yaml_file(config)
99
  logger.success(f"Config saved at: {file_path}")
100
  return file_path
101
+
yourbench_space/utils.py CHANGED
@@ -3,8 +3,10 @@ import os
3
  import re
4
  import pathlib
5
  import shutil
6
- from loguru import logger
7
  import subprocess
 
 
 
8
  from typing import List
9
 
10
  UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
@@ -49,6 +51,24 @@ def save_files(files: List[pathlib.Path]) -> str:
49
  else "No files were saved"
50
  )
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  class SubprocessManager:
54
  def __init__(self, command):
 
3
  import re
4
  import pathlib
5
  import shutil
 
6
  import subprocess
7
+ import pandas as pd
8
+ from datasets import load_dataset, get_dataset_config_names
9
+ from loguru import logger
10
  from typing import List
11
 
12
  UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
 
51
  else "No files were saved"
52
  )
53
 
54
+ def update_dataset(stages, hf_org, hf_prefix):
55
+ """
56
+ Updates the dataset based on the provided stages and dataset configuration.
57
+ """
58
+ # If not the final step, return empty dataframes
59
+ if stages[-1] != STAGES[-1]:
60
+ return (pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), pd.DataFrame())
61
+
62
+ # Construct dataset name from config
63
+ dataset_name = f"{hf_org}/{hf_prefix}"
64
+
65
+ # Load datasets and convert to DataFrame
66
+ ingestion_df = pd.DataFrame(load_dataset(dataset_name, name="ingested", split="train", streaming=True))
67
+ summarization_df = pd.DataFrame(load_dataset(dataset_name, name="summarization", split="train", streaming=True))
68
+ single_hop = pd.DataFrame(load_dataset(dataset_name, name="single_shot_question_generation", split="train", streaming=True))
69
+ answers_df = pd.DataFrame(load_dataset(dataset_name, name="answer_generation", split="train", streaming=True))
70
+
71
+ return (ingestion_df, summarization_df, single_hop, answers_df)
72
 
73
  class SubprocessManager:
74
  def __init__(self, command):