Alina Lozovskaya commited on
Commit
089a447
·
1 Parent(s): b6ca389

Change UI and config

Browse files
yourbench_space/app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import sys
3
  import time
4
  import gradio as gr
5
- import yaml
6
  from pathlib import Path
7
  from loguru import logger
8
  from huggingface_hub import whoami
@@ -13,9 +12,9 @@ from yourbench_space.utils import (
13
  UPLOAD_DIRECTORY,
14
  SubprocessManager,
15
  save_files,
 
16
  )
17
 
18
- # Short project description
19
  project_description = """
20
  # YourBench 🚀
21
  A Dynamic Benchmark Generation Framework
@@ -35,44 +34,43 @@ logger.add(sys.stderr, level="INFO")
35
  command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
36
  manager = SubprocessManager(command)
37
 
38
- # Create a citation section
39
  docs_path = Path(__file__).parent / "docs.md"
40
-
41
- # Read the file safely
42
- if docs_path.exists():
43
- docs_content = docs_path.read_text()
44
- else:
45
- docs_content = "# Citation\n\nDocumentation file not found."
46
-
47
- citation_content = docs_content.split("# Citation")[-1].strip()
48
 
49
  def generate_and_return(hf_org, hf_prefix):
50
- """Handles config generation and validates file existence before enabling download"""
51
  generate_and_save_config(hf_org, hf_prefix)
52
-
53
- # Wait until the config file is actually created
54
  for _ in range(5):
55
  if CONFIG_PATH.exists():
56
  break
57
  time.sleep(0.5)
58
 
59
- if CONFIG_PATH.exists():
60
- return "✅ Config saved!", gr.update(value=str(CONFIG_PATH), visible=True, interactive=True)
61
- else:
62
- return "❌ Config generation failed.", gr.update(visible=False, interactive=False)
 
 
 
 
 
 
 
63
 
64
- def prepare_task(oauth_token: gr.OAuthToken | None, model_token: str):
65
- """Prepares and starts the subprocess with environment variables."""
66
  new_env = os.environ.copy()
67
  if oauth_token:
68
  new_env["HF_TOKEN"] = oauth_token.token
69
- new_env["MODEL_API_KEY"] = model_token
70
  manager.start_process(custom_env=new_env)
71
 
 
72
  def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None):
73
- """Updates the dropdown with the user's Hugging Face organizations"""
74
  if oauth_token is None:
75
- print("Please deploy this on Spaces and log in to view the list of available organizations")
76
  return gr.Dropdown([], label="Organization")
77
 
78
  try:
@@ -83,38 +81,52 @@ def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None):
83
  return gr.Dropdown(org_names, value=user_name, label="Organization")
84
 
85
  except Exception as e:
86
- print(f"Error retrieving user info: {e}")
87
  return gr.Dropdown([], label="Organization")
88
 
 
89
  def enable_button(files):
90
- """Enables the button if files are uploaded"""
91
  return gr.update(interactive=bool(files))
92
 
93
 
94
  with gr.Blocks() as app:
95
  gr.Markdown(project_description)
96
-
97
  gr.Markdown("## YourBench Setup")
98
-
99
  with gr.Row():
100
  login_btn = gr.LoginButton()
101
 
102
  with gr.Tab("Setup"):
103
  with gr.Row():
104
  with gr.Accordion("Hugging Face Settings"):
105
- hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
 
 
106
  app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
107
 
108
- hf_dataset_prefix = gr.Textbox(label="Dataset Prefix", value="yourbench", info="Prefix applied to all datasets")
 
 
 
 
109
 
110
  with gr.Accordion("Upload documents"):
111
- file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
 
 
 
 
112
  output = gr.Textbox(label="Log")
113
- file_input.upload(lambda files: save_files([file.name for file in files]), file_input, output)
 
 
 
 
114
 
115
  preview_button = gr.Button("Generate New Config", interactive=False)
116
  log_message = gr.Textbox(label="Log Message", visible=True)
117
- download_button = gr.File(label="Download Config", visible=False, interactive=False)
 
 
118
 
119
  file_input.change(enable_button, inputs=file_input, outputs=preview_button)
120
 
@@ -125,18 +137,29 @@ with gr.Blocks() as app:
125
  )
126
 
127
  with gr.Tab("Run Generation"):
128
- log_output = gr.Code(label="Log Output", language=None, lines=20, interactive=False)
129
- log_timer = gr.Timer(0.05, active=True)
130
- log_timer.tick(manager.read_and_get_output, outputs=log_output)
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  with gr.Row():
133
  process_status = gr.Checkbox(label="Process Status", interactive=False)
134
- status_timer = gr.Timer(0.05, active=True)
135
  status_timer.tick(manager.is_running, outputs=process_status)
136
 
137
  with gr.Row():
138
  start_button = gr.Button("Start Task")
139
- start_button.click(prepare_task, inputs=[hf_org_dropdown])
140
 
141
  stop_button = gr.Button("Stop Task")
142
  stop_button.click(manager.stop_process)
@@ -144,9 +167,7 @@ with gr.Blocks() as app:
144
  kill_button = gr.Button("Kill Task")
145
  kill_button.click(manager.kill_process)
146
 
147
- # Citation section at the end
148
  with gr.Accordion("📜 Citation", open=False):
149
  gr.Markdown(citation_content)
150
 
151
-
152
- app.launch(allowed_paths=["/app"])
 
2
  import sys
3
  import time
4
  import gradio as gr
 
5
  from pathlib import Path
6
  from loguru import logger
7
  from huggingface_hub import whoami
 
12
  UPLOAD_DIRECTORY,
13
  SubprocessManager,
14
  save_files,
15
+ STAGES,
16
  )
17
 
 
18
  project_description = """
19
  # YourBench 🚀
20
  A Dynamic Benchmark Generation Framework
 
34
  command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
35
  manager = SubprocessManager(command)
36
 
 
37
  docs_path = Path(__file__).parent / "docs.md"
38
+ citation_content = (
39
+ docs_path.read_text().split("# Citation")[-1].strip()
40
+ if docs_path.exists()
41
+ else "# Citation\n\nDocumentation file not found."
42
+ )
 
 
 
43
 
44
  def generate_and_return(hf_org, hf_prefix):
 
45
  generate_and_save_config(hf_org, hf_prefix)
 
 
46
  for _ in range(5):
47
  if CONFIG_PATH.exists():
48
  break
49
  time.sleep(0.5)
50
 
51
+ return (
52
+ (
53
+ "✅ Config saved!",
54
+ gr.update(value=str(CONFIG_PATH), visible=True, interactive=True),
55
+ )
56
+ if CONFIG_PATH.exists()
57
+ else (
58
+ "❌ Config generation failed.",
59
+ gr.update(visible=False, interactive=False),
60
+ )
61
+ )
62
 
63
+
64
+ def prepare_task(oauth_token: gr.OAuthToken | None, hf_dataset_prefix: str, _=None):
65
  new_env = os.environ.copy()
66
  if oauth_token:
67
  new_env["HF_TOKEN"] = oauth_token.token
68
+ new_env["DATASET_PREFIX"] = hf_dataset_prefix
69
  manager.start_process(custom_env=new_env)
70
 
71
+
72
  def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None):
 
73
  if oauth_token is None:
 
74
  return gr.Dropdown([], label="Organization")
75
 
76
  try:
 
81
  return gr.Dropdown(org_names, value=user_name, label="Organization")
82
 
83
  except Exception as e:
 
84
  return gr.Dropdown([], label="Organization")
85
 
86
+
87
  def enable_button(files):
 
88
  return gr.update(interactive=bool(files))
89
 
90
 
91
  with gr.Blocks() as app:
92
  gr.Markdown(project_description)
 
93
  gr.Markdown("## YourBench Setup")
94
+
95
  with gr.Row():
96
  login_btn = gr.LoginButton()
97
 
98
  with gr.Tab("Setup"):
99
  with gr.Row():
100
  with gr.Accordion("Hugging Face Settings"):
101
+ hf_org_dropdown = gr.Dropdown(
102
+ choices=[], label="Organization", allow_custom_value=True
103
+ )
104
  app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
105
 
106
+ hf_dataset_prefix = gr.Textbox(
107
+ label="Dataset Prefix",
108
+ value="yourbench",
109
+ info="Prefix applied to all datasets",
110
+ )
111
 
112
  with gr.Accordion("Upload documents"):
113
+ file_input = gr.File(
114
+ label="Upload text files",
115
+ file_count="multiple",
116
+ file_types=[".txt", ".md", ".html"],
117
+ )
118
  output = gr.Textbox(label="Log")
119
+ file_input.upload(
120
+ lambda files: save_files([file.name for file in files]),
121
+ file_input,
122
+ output,
123
+ )
124
 
125
  preview_button = gr.Button("Generate New Config", interactive=False)
126
  log_message = gr.Textbox(label="Log Message", visible=True)
127
+ download_button = gr.File(
128
+ label="Download Config", visible=False, interactive=False
129
+ )
130
 
131
  file_input.change(enable_button, inputs=file_input, outputs=preview_button)
132
 
 
137
  )
138
 
139
  with gr.Tab("Run Generation"):
140
+ with gr.Row():
141
+ with gr.Accordion("Log Output", open=True):
142
+ log_output = gr.Code(language=None, lines=20, interactive=False)
143
+
144
+ with gr.Accordion("Stages", open=True):
145
+ stages_table = gr.CheckboxGroup(
146
+ choices=STAGES,
147
+ value=[],
148
+ label="Pipeline Stages Completed",
149
+ interactive=False,
150
+ )
151
+
152
+ log_timer = gr.Timer(1.0, active=True)
153
+ log_timer.tick(manager.read_and_get_output, outputs=[log_output, stages_table])
154
 
155
  with gr.Row():
156
  process_status = gr.Checkbox(label="Process Status", interactive=False)
157
+ status_timer = gr.Timer(1.0, active=True)
158
  status_timer.tick(manager.is_running, outputs=process_status)
159
 
160
  with gr.Row():
161
  start_button = gr.Button("Start Task")
162
+ start_button.click(prepare_task, inputs=[login_btn, hf_dataset_prefix])
163
 
164
  stop_button = gr.Button("Stop Task")
165
  stop_button.click(manager.stop_process)
 
167
  kill_button = gr.Button("Kill Task")
168
  kill_button.click(manager.kill_process)
169
 
 
170
  with gr.Accordion("📜 Citation", open=False):
171
  gr.Markdown(citation_content)
172
 
173
+ app.launch(allowed_paths=["/app"])
 
yourbench_space/config.py CHANGED
@@ -2,42 +2,46 @@ import yaml
2
  from loguru import logger
3
  from yourbench_space.utils import CONFIG_PATH
4
 
 
5
  def generate_base_config(hf_org, hf_prefix):
6
  """Creates the base config dictionary"""
7
  return {
8
  "hf_configuration": {
9
  "token": "$HF_TOKEN",
10
  "private": True,
11
- "hf_organization": hf_org,
12
- "hf_dataset_name": hf_prefix
13
  },
14
  "local_dataset_dir": "results/",
15
  "model_list": [
16
  {
17
  "model_name": "meta-llama/Llama-3.3-70B-Instruct",
18
- "provider": "huggingface",
19
- "base_url": "https://jsq69lxgkhvpnliw.us-east-1.aws.endpoints.huggingface.cloud",
20
- "api_key": "$HF_TOKEN",
21
- "max_concurrent_requests": 16
 
 
 
22
  }
23
  ],
24
  "model_roles": {
25
  "ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
26
- "summarization": ["meta-llama/Llama-3.3-70B-Instruct"],
27
  "single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
28
  "multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
29
- "answer_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
30
- "judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"]
31
  },
32
  "pipeline": {
33
  "ingestion": {
34
  "source_documents_dir": "/app/uploaded_files",
35
  "output_dir": "/app/ingested",
36
- "run": True
37
  },
38
  "upload_ingest_to_hub": {
39
  "source_documents_dir": "/app/ingested",
40
- "run": True
41
  },
42
  "summarization": {"run": True},
43
  "chunking": {
@@ -46,42 +50,52 @@ def generate_base_config(hf_org, hf_prefix):
46
  "l_max_tokens": 128,
47
  "tau_threshold": 0.3,
48
  "h_min": 2,
49
- "h_max": 4
50
  },
51
- "run": True
52
  },
53
  "single_shot_question_generation": {
54
  "diversification_seed": "24 year old adult",
55
- "run": True
56
  },
57
  "multi_hop_question_generation": {"run": True},
58
  "answer_generation": {
59
  "question_type": "single_shot",
60
  "run": True,
61
  "strategies": [
62
- {"name": "zeroshot", "prompt": "ZEROSHOT_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"},
63
- {"name": "gold", "prompt": "GOLD_QA_USER_PROMPT", "model_name": "meta-llama/Llama-3.3-70B-Instruct"}
64
- ]
 
 
 
 
 
 
 
 
65
  },
66
  "judge_answers": {
67
  "run": True,
68
  "comparing_strategies": [["zeroshot", "gold"]],
69
  "chunk_column_index": 0,
70
- "random_seed": 42
71
- }
72
- }
73
  }
74
 
 
75
  def save_yaml_file(config):
76
  """Saves the given config dictionary to a YAML file"""
77
  with open(CONFIG_PATH, "w") as file:
78
  yaml.dump(config, file, default_flow_style=False, sort_keys=False)
79
  return CONFIG_PATH
80
 
 
81
  def generate_and_save_config(hf_org, hf_prefix):
82
  """Generates and saves the YAML configuration file"""
83
  logger.debug(f"Generating config with org: {hf_org}, prefix: {hf_prefix}")
84
  config = generate_base_config(hf_org, hf_prefix)
85
  file_path = save_yaml_file(config)
86
  logger.success(f"Config saved at: {file_path}")
87
- return file_path
 
2
  from loguru import logger
3
  from yourbench_space.utils import CONFIG_PATH
4
 
5
+
6
  def generate_base_config(hf_org, hf_prefix):
7
  """Creates the base config dictionary"""
8
  return {
9
  "hf_configuration": {
10
  "token": "$HF_TOKEN",
11
  "private": True,
12
+ "hf_organization": hf_org,
13
+ "hf_dataset_name": hf_prefix,
14
  },
15
  "local_dataset_dir": "results/",
16
  "model_list": [
17
  {
18
  "model_name": "meta-llama/Llama-3.3-70B-Instruct",
19
+ "provider": "sambanova",
20
+ "max_concurrent_requests": 32,
21
+ },
22
+ {
23
+ "model_name": "Qwen/Qwen2.5-72B-Instruct",
24
+ "provider": "nebius",
25
+ "max_concurrent_requests": 32,
26
  }
27
  ],
28
  "model_roles": {
29
  "ingestion": ["meta-llama/Llama-3.3-70B-Instruct"],
30
+ "summarization": ["Qwen/Qwen2.5-72B-Instruct"],
31
  "single_shot_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
32
  "multi_hop_question_generation": ["meta-llama/Llama-3.3-70B-Instruct"],
33
+ "answer_generation": ["Qwen/Qwen2.5-72B-Instruct"],
34
+ "judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"],
35
  },
36
  "pipeline": {
37
  "ingestion": {
38
  "source_documents_dir": "/app/uploaded_files",
39
  "output_dir": "/app/ingested",
40
+ "run": True,
41
  },
42
  "upload_ingest_to_hub": {
43
  "source_documents_dir": "/app/ingested",
44
+ "run": True,
45
  },
46
  "summarization": {"run": True},
47
  "chunking": {
 
50
  "l_max_tokens": 128,
51
  "tau_threshold": 0.3,
52
  "h_min": 2,
53
+ "h_max": 4,
54
  },
55
+ "run": True,
56
  },
57
  "single_shot_question_generation": {
58
  "diversification_seed": "24 year old adult",
59
+ "run": True,
60
  },
61
  "multi_hop_question_generation": {"run": True},
62
  "answer_generation": {
63
  "question_type": "single_shot",
64
  "run": True,
65
  "strategies": [
66
+ {
67
+ "name": "zeroshot",
68
+ "prompt": "ZEROSHOT_QA_USER_PROMPT",
69
+ "model_name": "meta-llama/Llama-3.3-70B-Instruct",
70
+ },
71
+ {
72
+ "name": "gold",
73
+ "prompt": "GOLD_QA_USER_PROMPT",
74
+ "model_name": "meta-llama/Llama-3.3-70B-Instruct",
75
+ },
76
+ ],
77
  },
78
  "judge_answers": {
79
  "run": True,
80
  "comparing_strategies": [["zeroshot", "gold"]],
81
  "chunk_column_index": 0,
82
+ "random_seed": 42,
83
+ },
84
+ },
85
  }
86
 
87
+
88
  def save_yaml_file(config):
89
  """Saves the given config dictionary to a YAML file"""
90
  with open(CONFIG_PATH, "w") as file:
91
  yaml.dump(config, file, default_flow_style=False, sort_keys=False)
92
  return CONFIG_PATH
93
 
94
+
95
  def generate_and_save_config(hf_org, hf_prefix):
96
  """Generates and saves the YAML configuration file"""
97
  logger.debug(f"Generating config with org: {hf_org}, prefix: {hf_prefix}")
98
  config = generate_base_config(hf_org, hf_prefix)
99
  file_path = save_yaml_file(config)
100
  logger.success(f"Config saved at: {file_path}")
101
+ return file_path
yourbench_space/utils.py CHANGED
@@ -1,5 +1,6 @@
1
  import io
2
  import os
 
3
  import pathlib
4
  import shutil
5
  from loguru import logger
@@ -12,6 +13,18 @@ CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
12
  # Ensure the upload directory exists
13
  UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def save_files(files: List[pathlib.Path]) -> str:
16
  """Save uploaded files to the UPLOAD_DIRECTORY safely"""
17
  saved_paths = []
@@ -31,7 +44,12 @@ def save_files(files: List[pathlib.Path]) -> str:
31
  except Exception as e:
32
  print(f"Error moving file {file}: {e}")
33
 
34
- return f"Files saved to: {', '.join(saved_paths)}" if saved_paths else "No files were saved"
 
 
 
 
 
35
 
36
  class SubprocessManager:
37
  def __init__(self, command):
@@ -53,24 +71,29 @@ class SubprocessManager:
53
  text=True,
54
  bufsize=1,
55
  start_new_session=True,
56
- env=custom_env
57
  )
58
  os.set_blocking(self.process.stdout.fileno(), False)
59
  logger.info("Started the process")
60
 
61
  def read_and_get_output(self):
62
- """Read available subprocess output and return the captured output"""
63
  if self.process and self.process.stdout:
64
  try:
65
  while True:
66
  line = self.process.stdout.readline()
67
  if line:
68
- self.output_stream.write(line) # Capture in StringIO
69
  else:
70
  break
71
  except BlockingIOError:
72
  pass
73
- return self.output_stream.getvalue()
 
 
 
 
 
74
 
75
  def stop_process(self):
76
  """Terminate the subprocess."""
@@ -79,9 +102,9 @@ class SubprocessManager:
79
  return
80
  logger.info("Sending SIGTERM to the Process")
81
  self.process.terminate()
82
- exit_code = self.process.wait() # Wait for process to terminate
83
  logger.info(f"Process stopped exit code {exit_code}")
84
- #return exit_code
85
 
86
  def kill_process(self):
87
  """Forcefully kill the subprocess"""
@@ -90,9 +113,9 @@ class SubprocessManager:
90
  return
91
  logger.info("Sending SIGKILL to the Process")
92
  self.process.kill()
93
- exit_code = self.process.wait() # Wait for process to be killed
94
  logger.info(f"Process killed exit code {exit_code}")
95
- #return exit_code
96
 
97
  def is_running(self):
98
  """Check if the subprocess is still running"""
 
1
  import io
2
  import os
3
+ import re
4
  import pathlib
5
  import shutil
6
  from loguru import logger
 
13
  # Ensure the upload directory exists
14
  UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
15
 
16
+ STAGES = [
17
+ "ingestion",
18
+ "upload_ingest_to_hub",
19
+ "summarization",
20
+ "chunking",
21
+ "single_shot_question_generation",
22
+ "multi_hop_question_generation",
23
+ "answer_generation",
24
+ "judge_answers",
25
+ ]
26
+
27
+
28
  def save_files(files: List[pathlib.Path]) -> str:
29
  """Save uploaded files to the UPLOAD_DIRECTORY safely"""
30
  saved_paths = []
 
44
  except Exception as e:
45
  print(f"Error moving file {file}: {e}")
46
 
47
+ return (
48
+ f"Files saved to: {', '.join(saved_paths)}"
49
+ if saved_paths
50
+ else "No files were saved"
51
+ )
52
+
53
 
54
  class SubprocessManager:
55
  def __init__(self, command):
 
71
  text=True,
72
  bufsize=1,
73
  start_new_session=True,
74
+ env=custom_env,
75
  )
76
  os.set_blocking(self.process.stdout.fileno(), False)
77
  logger.info("Started the process")
78
 
79
  def read_and_get_output(self):
80
+ """Read subprocess output, capture it, and return log and completed stages."""
81
  if self.process and self.process.stdout:
82
  try:
83
  while True:
84
  line = self.process.stdout.readline()
85
  if line:
86
+ self.output_stream.write(line)
87
  else:
88
  break
89
  except BlockingIOError:
90
  pass
91
+
92
+ current_output = self.output_stream.getvalue()
93
+ completed_stages = list(
94
+ set(re.findall(r"Successfully completed stage: (\w+)", current_output))
95
+ )
96
+ return current_output, completed_stages
97
 
98
  def stop_process(self):
99
  """Terminate the subprocess."""
 
102
  return
103
  logger.info("Sending SIGTERM to the Process")
104
  self.process.terminate()
105
+ exit_code = self.process.wait() # Wait for process to terminate
106
  logger.info(f"Process stopped exit code {exit_code}")
107
+ # return exit_code
108
 
109
  def kill_process(self):
110
  """Forcefully kill the subprocess"""
 
113
  return
114
  logger.info("Sending SIGKILL to the Process")
115
  self.process.kill()
116
+ exit_code = self.process.wait() # Wait for process to be killed
117
  logger.info(f"Process killed exit code {exit_code}")
118
+ # return exit_code
119
 
120
  def is_running(self):
121
  """Check if the subprocess is still running"""