Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaya
commited on
Commit
·
ea047ad
1
Parent(s):
2617bee
Update Setup and Run Generatation tabs
Browse files- yourbench_space/app.py +125 -77
- yourbench_space/config.py +33 -37
- yourbench_space/evaluation.py +34 -20
- yourbench_space/utils.py +94 -43
yourbench_space/app.py
CHANGED
@@ -1,32 +1,33 @@
|
|
1 |
-
import asyncio
|
2 |
import os
|
3 |
import sys
|
4 |
import time
|
5 |
-
import gradio as gr
|
6 |
import uuid
|
|
|
|
|
7 |
|
8 |
-
from datasets import load_dataset
|
9 |
-
from huggingface_hub import whoami
|
10 |
from loguru import logger
|
11 |
-
from pathlib import Path
|
12 |
|
13 |
-
|
|
|
|
|
14 |
from yourbench_space.utils import (
|
|
|
15 |
SubprocessManagerGroup,
|
16 |
save_files,
|
17 |
update_dataset,
|
18 |
-
|
19 |
-
is_running_locally
|
20 |
)
|
21 |
-
from yourbench_space.
|
22 |
-
from yourbench_space.
|
|
|
23 |
|
24 |
project_description = """
|
25 |
-
# YourBench 🚀
|
26 |
**Dynamic Benchmark Generation for Language Models**
|
27 |
|
28 |
Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
|
29 |
-
- 📖 [FAQ](#)
|
30 |
- 💻 [GitHub](https://github.com/huggingface/yourbench/tree/v0.2-alpha-space)
|
31 |
"""
|
32 |
|
@@ -35,7 +36,7 @@ logger.add(sys.stderr, level="INFO")
|
|
35 |
|
36 |
# Global to store all managers per session
|
37 |
MANAGERS = SubprocessManagerGroup()
|
38 |
-
USER_ID_SESSION_MAP: dict[str, str] =
|
39 |
|
40 |
|
41 |
docs_path = Path(__file__).parent / "docs.md"
|
@@ -45,30 +46,36 @@ citation_content = (
|
|
45 |
else "# Citation\n\nDocumentation file not found."
|
46 |
)
|
47 |
|
|
|
48 |
def generate_and_return(hf_org, hf_dataset_name, session_state: gr.State):
|
49 |
manager = MANAGERS.get(session_state)
|
50 |
-
if manager is None:
|
51 |
return (
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
session_uid = session_state.value
|
57 |
config_path = generate_and_save_config(hf_org, hf_dataset_name, session_uid, manager.config_path)
|
58 |
for _ in range(5):
|
59 |
time.sleep(0.5)
|
60 |
if config_path.exists():
|
|
|
61 |
return (
|
62 |
"✅ Config saved!",
|
63 |
gr.update(value=str(config_path), visible=True, interactive=True),
|
64 |
)
|
|
|
|
|
65 |
return (
|
66 |
"❌ Config generation failed.",
|
67 |
gr.update(visible=False, interactive=False),
|
68 |
)
|
69 |
|
|
|
70 |
final_dataset = None
|
71 |
|
|
|
72 |
def update_process_status(session_state: gr.State):
|
73 |
"""Update process status and include exit details if process has terminated"""
|
74 |
if session_state is None:
|
@@ -79,17 +86,22 @@ def update_process_status(session_state: gr.State):
|
|
79 |
return gr.update(value=False, label="Not running")
|
80 |
|
81 |
is_running = manager.is_running()
|
82 |
-
|
83 |
if not is_running:
|
84 |
exit_code, exit_reason = manager.get_exit_details()
|
85 |
-
status_text =
|
|
|
|
|
|
|
|
|
86 |
return gr.update(value=False, label=status_text)
|
87 |
-
|
88 |
return gr.update(value=True, label="Process Status: Running")
|
89 |
|
|
|
90 |
def prepare_task(session_uid: str, oauth_token: gr.OAuthToken | None, hf_dataset_name: str, _=None):
|
91 |
if oauth_token is None and not is_running_locally():
|
92 |
-
gr.Warning(
|
93 |
return
|
94 |
new_env = os.environ.copy()
|
95 |
|
@@ -122,6 +134,7 @@ def switch_to_run_generation_tab():
|
|
122 |
def enable_button(files):
|
123 |
return gr.update(interactive=bool(files))
|
124 |
|
|
|
125 |
def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_name):
|
126 |
# Test dataset existence
|
127 |
eval_ds_name = f"{org_name}/{eval_name}"
|
@@ -136,13 +149,29 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
|
|
136 |
status = asyncio.run(run_evaluations(eval_ds_name=eval_ds_name, org=org_name))
|
137 |
# Create space
|
138 |
from huggingface_hub import HfApi
|
|
|
139 |
repo_id = f"{org_name}/leaderboard_yourbench_{eval_ds_name.replace('/', '_')}"
|
140 |
api = HfApi()
|
141 |
|
142 |
try:
|
143 |
-
api.create_repo(
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
api.add_space_variable(repo_id=repo_id, key="TASK", value=eval_ds_name, token=oauth_token.token)
|
147 |
api.add_space_variable(repo_id=repo_id, key="ORG_NAME", value=org_name, token=oauth_token.token)
|
148 |
except Exception as e:
|
@@ -179,8 +208,6 @@ def init_session(profile: gr.OAuthProfile | None):
|
|
179 |
|
180 |
|
181 |
with gr.Blocks(theme=gr.themes.Default()) as app:
|
182 |
-
# We initialize the session state with the user randomly generated uuid
|
183 |
-
# Using uuid4 makes collision cases extremely unlikely even for concurrent users
|
184 |
session_state = gr.State()
|
185 |
|
186 |
gr.Markdown(project_description)
|
@@ -190,12 +217,8 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
190 |
with gr.Row():
|
191 |
with gr.Accordion("Hugging Face Settings"):
|
192 |
login_btn = gr.LoginButton()
|
193 |
-
hf_org_dropdown = gr.Dropdown(
|
194 |
-
|
195 |
-
)
|
196 |
-
app.load(
|
197 |
-
update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown
|
198 |
-
)
|
199 |
|
200 |
hf_dataset_name = gr.Textbox(
|
201 |
label="Dataset name",
|
@@ -213,17 +236,36 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
213 |
file_input.upload(
|
214 |
save_files,
|
215 |
inputs=[session_state, file_input],
|
216 |
-
outputs
|
217 |
)
|
|
|
218 |
|
219 |
preview_button = gr.Button("Generate New Config", interactive=False)
|
220 |
log_message = gr.Textbox(label="Log Message", visible=True)
|
221 |
-
download_button = gr.File(
|
222 |
-
|
|
|
|
|
|
|
|
|
223 |
)
|
224 |
|
225 |
file_input.change(enable_button, inputs=file_input, outputs=preview_button)
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
preview_button.click(
|
228 |
generate_and_return,
|
229 |
inputs=[hf_org_dropdown, hf_dataset_name, session_state],
|
@@ -234,66 +276,72 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
234 |
inputs=None,
|
235 |
outputs=tabs,
|
236 |
)
|
237 |
-
|
238 |
with gr.Tab("Run Generation", id=1):
|
239 |
-
with gr.
|
240 |
-
|
241 |
-
|
|
|
|
|
242 |
|
243 |
-
|
244 |
stop_button.click(MANAGERS.stop_process, inputs=session_state)
|
245 |
-
|
246 |
-
kill_button = gr.Button("Kill Task")
|
247 |
kill_button.click(MANAGERS.kill_process, inputs=session_state)
|
248 |
|
|
|
|
|
|
|
249 |
|
250 |
-
|
251 |
-
with gr.Column():
|
252 |
-
with gr.Accordion("Log Output", open=True):
|
253 |
-
log_output = gr.Code(language=None, lines=20, interactive=False)
|
254 |
-
|
255 |
-
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
256 |
-
status_timer = gr.Timer(2.0, active=True)
|
257 |
-
status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
|
258 |
-
|
259 |
-
with gr.Column():
|
260 |
with gr.Accordion("Stages", open=True):
|
261 |
stages_table = gr.CheckboxGroup(
|
262 |
-
choices=STAGES,
|
263 |
value=[],
|
264 |
label="Pipeline Stages Completed",
|
|
|
265 |
interactive=False,
|
266 |
)
|
267 |
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
-
# TODO: this timer should only be active when the second tab is passed to active for the first time
|
285 |
-
log_timer = gr.Timer(1.0, active=True)
|
286 |
-
log_timer.tick(
|
287 |
-
MANAGERS.read_and_get_output, inputs=session_state, outputs=[log_output, stages_table]
|
288 |
-
)
|
289 |
with gr.Tab("Evaluate", id=2, visible=False):
|
290 |
with gr.Row():
|
291 |
btn_launch_evals = gr.Button("Launch evaluations")
|
292 |
status = gr.Textbox(label="Status")
|
293 |
-
|
294 |
btn_launch_evals.click(run_evaluation_pipeline, [hf_org_dropdown, hf_dataset_name], status)
|
295 |
|
296 |
app.load(init_session, outputs=session_state)
|
297 |
|
298 |
-
|
299 |
-
app.launch(allowed_paths=["/app"])
|
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import time
|
|
|
4 |
import uuid
|
5 |
+
import asyncio
|
6 |
+
from pathlib import Path
|
7 |
|
|
|
|
|
8 |
from loguru import logger
|
|
|
9 |
|
10 |
+
import gradio as gr
|
11 |
+
from datasets import load_dataset
|
12 |
+
from huggingface_hub import whoami
|
13 |
from yourbench_space.utils import (
|
14 |
+
STAGES,
|
15 |
SubprocessManagerGroup,
|
16 |
save_files,
|
17 |
update_dataset,
|
18 |
+
map_stage_names,
|
19 |
+
is_running_locally,
|
20 |
)
|
21 |
+
from yourbench_space.config import generate_and_save_config
|
22 |
+
from yourbench_space.evaluation import run_evaluations, create_eval_file
|
23 |
+
|
24 |
|
25 |
project_description = """
|
26 |
+
# YourBench 🚀
|
27 |
**Dynamic Benchmark Generation for Language Models**
|
28 |
|
29 |
Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
|
30 |
+
- 📖 [FAQ](#)
|
31 |
- 💻 [GitHub](https://github.com/huggingface/yourbench/tree/v0.2-alpha-space)
|
32 |
"""
|
33 |
|
|
|
36 |
|
37 |
# Global to store all managers per session
|
38 |
MANAGERS = SubprocessManagerGroup()
|
39 |
+
USER_ID_SESSION_MAP: dict[str, str] = {}
|
40 |
|
41 |
|
42 |
docs_path = Path(__file__).parent / "docs.md"
|
|
|
46 |
else "# Citation\n\nDocumentation file not found."
|
47 |
)
|
48 |
|
49 |
+
|
50 |
def generate_and_return(hf_org, hf_dataset_name, session_state: gr.State):
|
51 |
manager = MANAGERS.get(session_state)
|
52 |
+
if manager is None: # should not be possible
|
53 |
return (
|
54 |
+
"❌ Config generation failed.",
|
55 |
+
gr.update(visible=False, interactive=False),
|
56 |
+
)
|
57 |
+
|
58 |
session_uid = session_state.value
|
59 |
config_path = generate_and_save_config(hf_org, hf_dataset_name, session_uid, manager.config_path)
|
60 |
for _ in range(5):
|
61 |
time.sleep(0.5)
|
62 |
if config_path.exists():
|
63 |
+
gr.Success("Config generated")
|
64 |
return (
|
65 |
"✅ Config saved!",
|
66 |
gr.update(value=str(config_path), visible=True, interactive=True),
|
67 |
)
|
68 |
+
|
69 |
+
gr.Error("Failed to generate config")
|
70 |
return (
|
71 |
"❌ Config generation failed.",
|
72 |
gr.update(visible=False, interactive=False),
|
73 |
)
|
74 |
|
75 |
+
|
76 |
final_dataset = None
|
77 |
|
78 |
+
|
79 |
def update_process_status(session_state: gr.State):
|
80 |
"""Update process status and include exit details if process has terminated"""
|
81 |
if session_state is None:
|
|
|
86 |
return gr.update(value=False, label="Not running")
|
87 |
|
88 |
is_running = manager.is_running()
|
89 |
+
|
90 |
if not is_running:
|
91 |
exit_code, exit_reason = manager.get_exit_details()
|
92 |
+
status_text = (
|
93 |
+
f"Process Status: Stopped - {exit_reason}, exit code - {exit_code}"
|
94 |
+
if exit_reason
|
95 |
+
else "Process Status: Stopped"
|
96 |
+
)
|
97 |
return gr.update(value=False, label=status_text)
|
98 |
+
|
99 |
return gr.update(value=True, label="Process Status: Running")
|
100 |
|
101 |
+
|
102 |
def prepare_task(session_uid: str, oauth_token: gr.OAuthToken | None, hf_dataset_name: str, _=None):
|
103 |
if oauth_token is None and not is_running_locally():
|
104 |
+
gr.Warning("You need to log in to use this Space")
|
105 |
return
|
106 |
new_env = os.environ.copy()
|
107 |
|
|
|
134 |
def enable_button(files):
|
135 |
return gr.update(interactive=bool(files))
|
136 |
|
137 |
+
|
138 |
def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_name):
|
139 |
# Test dataset existence
|
140 |
eval_ds_name = f"{org_name}/{eval_name}"
|
|
|
149 |
status = asyncio.run(run_evaluations(eval_ds_name=eval_ds_name, org=org_name))
|
150 |
# Create space
|
151 |
from huggingface_hub import HfApi
|
152 |
+
|
153 |
repo_id = f"{org_name}/leaderboard_yourbench_{eval_ds_name.replace('/', '_')}"
|
154 |
api = HfApi()
|
155 |
|
156 |
try:
|
157 |
+
api.create_repo(
|
158 |
+
repo_id=repo_id,
|
159 |
+
repo_type="space",
|
160 |
+
space_sdk="gradio",
|
161 |
+
token=oauth_token.token,
|
162 |
+
)
|
163 |
+
api.upload_folder(
|
164 |
+
repo_id=repo_id,
|
165 |
+
repo_type="space",
|
166 |
+
folder_path="src/",
|
167 |
+
token=oauth_token.token,
|
168 |
+
)
|
169 |
+
api.add_space_secret(
|
170 |
+
repo_id=repo_id,
|
171 |
+
key="HF_TOKEN",
|
172 |
+
value=oauth_token.token,
|
173 |
+
token=oauth_token.token,
|
174 |
+
)
|
175 |
api.add_space_variable(repo_id=repo_id, key="TASK", value=eval_ds_name, token=oauth_token.token)
|
176 |
api.add_space_variable(repo_id=repo_id, key="ORG_NAME", value=org_name, token=oauth_token.token)
|
177 |
except Exception as e:
|
|
|
208 |
|
209 |
|
210 |
with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
|
|
|
211 |
session_state = gr.State()
|
212 |
|
213 |
gr.Markdown(project_description)
|
|
|
217 |
with gr.Row():
|
218 |
with gr.Accordion("Hugging Face Settings"):
|
219 |
login_btn = gr.LoginButton()
|
220 |
+
hf_org_dropdown = gr.Dropdown(choices=[], label="Organization", allow_custom_value=True)
|
221 |
+
app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
|
|
|
|
|
|
|
|
|
222 |
|
223 |
hf_dataset_name = gr.Textbox(
|
224 |
label="Dataset name",
|
|
|
236 |
file_input.upload(
|
237 |
save_files,
|
238 |
inputs=[session_state, file_input],
|
239 |
+
outputs=output,
|
240 |
)
|
241 |
+
delete_button = gr.Button("Delete Uploaded Files", visible=False)
|
242 |
|
243 |
preview_button = gr.Button("Generate New Config", interactive=False)
|
244 |
log_message = gr.Textbox(label="Log Message", visible=True)
|
245 |
+
download_button = gr.File(label="Download Config", visible=False, interactive=False)
|
246 |
+
|
247 |
+
file_input.change(
|
248 |
+
lambda files: gr.update(visible=bool(files)),
|
249 |
+
inputs=file_input,
|
250 |
+
outputs=delete_button,
|
251 |
)
|
252 |
|
253 |
file_input.change(enable_button, inputs=file_input, outputs=preview_button)
|
254 |
|
255 |
+
def clean_and_confirm(uid):
|
256 |
+
MANAGERS.clean_workdir(uid)
|
257 |
+
return (
|
258 |
+
"Deleted all uploaded files.",
|
259 |
+
gr.update(value=None),
|
260 |
+
gr.update(interactive=False),
|
261 |
+
)
|
262 |
+
|
263 |
+
delete_button.click(
|
264 |
+
clean_and_confirm,
|
265 |
+
inputs=session_state,
|
266 |
+
outputs=[output, file_input, preview_button],
|
267 |
+
)
|
268 |
+
|
269 |
preview_button.click(
|
270 |
generate_and_return,
|
271 |
inputs=[hf_org_dropdown, hf_dataset_name, session_state],
|
|
|
276 |
inputs=None,
|
277 |
outputs=tabs,
|
278 |
)
|
279 |
+
|
280 |
with gr.Tab("Run Generation", id=1):
|
281 |
+
with gr.Column():
|
282 |
+
with gr.Row():
|
283 |
+
start_button = gr.Button("Start Task")
|
284 |
+
stop_button = gr.Button("Stop Task")
|
285 |
+
kill_button = gr.Button("Kill Task")
|
286 |
|
287 |
+
start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
|
288 |
stop_button.click(MANAGERS.stop_process, inputs=session_state)
|
|
|
|
|
289 |
kill_button.click(MANAGERS.kill_process, inputs=session_state)
|
290 |
|
291 |
+
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
292 |
+
status_timer = gr.Timer(2.0, active=True)
|
293 |
+
status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
|
294 |
|
295 |
+
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
with gr.Accordion("Stages", open=True):
|
297 |
stages_table = gr.CheckboxGroup(
|
298 |
+
choices=map_stage_names(STAGES),
|
299 |
value=[],
|
300 |
label="Pipeline Stages Completed",
|
301 |
+
container=False,
|
302 |
interactive=False,
|
303 |
)
|
304 |
|
305 |
+
with gr.Row():
|
306 |
+
with gr.Column(scale=2):
|
307 |
+
with gr.Accordion("Ingestion Preview"):
|
308 |
+
ingestion_df = gr.DataFrame()
|
309 |
+
|
310 |
+
with gr.Accordion("Summarization Preview"):
|
311 |
+
summarization_df = gr.DataFrame()
|
312 |
+
|
313 |
+
with gr.Accordion("Single Shot Preview"):
|
314 |
+
single_shot_df = gr.DataFrame()
|
315 |
+
|
316 |
+
with gr.Accordion("Multi Hop Preview"):
|
317 |
+
multi_hop_df = gr.DataFrame()
|
318 |
+
|
319 |
+
with gr.Accordion("Lighteval Preview"):
|
320 |
+
lighteval_df = gr.DataFrame()
|
321 |
+
|
322 |
+
stages_table.change(
|
323 |
+
update_dataset,
|
324 |
+
inputs=[stages_table, hf_org_dropdown, hf_dataset_name],
|
325 |
+
outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
|
326 |
+
)
|
327 |
+
|
328 |
+
with gr.Accordion("Log Output", open=False):
|
329 |
+
log_output = gr.Code(language=None, lines=20, interactive=False)
|
330 |
+
|
331 |
+
# TODO: this timer should only be active when the second tab is passed to active for the first time
|
332 |
+
log_timer = gr.Timer(1.0, active=True)
|
333 |
+
log_timer.tick(
|
334 |
+
MANAGERS.read_and_get_output,
|
335 |
+
inputs=session_state,
|
336 |
+
outputs=[log_output, stages_table],
|
337 |
+
)
|
338 |
|
|
|
|
|
|
|
|
|
|
|
339 |
with gr.Tab("Evaluate", id=2, visible=False):
|
340 |
with gr.Row():
|
341 |
btn_launch_evals = gr.Button("Launch evaluations")
|
342 |
status = gr.Textbox(label="Status")
|
|
|
343 |
btn_launch_evals.click(run_evaluation_pipeline, [hf_org_dropdown, hf_dataset_name], status)
|
344 |
|
345 |
app.load(init_session, outputs=session_state)
|
346 |
|
347 |
+
app.launch(allowed_paths=["/home/user/app"])
|
|
yourbench_space/config.py
CHANGED
@@ -7,13 +7,14 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
7 |
return {
|
8 |
"hf_configuration": {
|
9 |
"token": "$HF_TOKEN",
|
10 |
-
"private": True,
|
11 |
"hf_organization": hf_org,
|
|
|
12 |
"hf_dataset_name": hf_dataset_name,
|
|
|
13 |
},
|
14 |
"model_list": [
|
15 |
{
|
16 |
-
"model_name": "
|
17 |
"provider": "novita",
|
18 |
"max_concurrent_requests": 32,
|
19 |
},
|
@@ -21,63 +22,59 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
21 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
22 |
"provider": "novita",
|
23 |
"max_concurrent_requests": 32,
|
24 |
-
}
|
25 |
],
|
26 |
"model_roles": {
|
27 |
-
"ingestion": ["
|
28 |
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"judge_answers": ["meta-llama/Llama-3.3-70B-Instruct"],
|
33 |
},
|
34 |
"pipeline": {
|
35 |
"ingestion": {
|
36 |
-
"source_documents_dir": f"/app/{session_uid}/uploaded_files/",
|
37 |
-
"output_dir": f"/app/{session_uid}/ingested",
|
38 |
"run": True,
|
39 |
},
|
40 |
"upload_ingest_to_hub": {
|
41 |
-
"source_documents_dir": f"/app/{session_uid}/ingested",
|
|
|
|
|
|
|
42 |
"run": True,
|
43 |
},
|
44 |
-
"summarization": {"run": True},
|
45 |
"chunking": {
|
|
|
46 |
"chunking_configuration": {
|
47 |
"l_min_tokens": 64,
|
48 |
"l_max_tokens": 128,
|
49 |
-
"tau_threshold": 0.
|
50 |
"h_min": 2,
|
51 |
-
"h_max":
|
|
|
52 |
},
|
53 |
-
"run": True,
|
54 |
},
|
55 |
"single_shot_question_generation": {
|
56 |
-
"diversification_seed": "24 year old adult",
|
57 |
"run": True,
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
},
|
59 |
-
"multi_hop_question_generation": {
|
60 |
-
"answer_generation": {
|
61 |
-
"question_type": "single_shot",
|
62 |
"run": True,
|
63 |
-
"
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
{
|
70 |
-
"name": "gold",
|
71 |
-
"prompt": "GOLD_QA_USER_PROMPT",
|
72 |
-
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
73 |
-
},
|
74 |
-
],
|
75 |
},
|
76 |
-
"
|
77 |
-
"run":
|
78 |
-
"comparing_strategies": [["zeroshot", "gold"]],
|
79 |
-
"chunk_column_index": 0,
|
80 |
-
"random_seed": 42,
|
81 |
},
|
82 |
},
|
83 |
}
|
@@ -97,4 +94,3 @@ def generate_and_save_config(hf_org: str, hf_name: str, session_uid: str, config
|
|
97 |
file_path = save_yaml_file(config, config_path)
|
98 |
logger.success(f"Config saved at: {file_path}")
|
99 |
return file_path
|
100 |
-
|
|
|
7 |
return {
|
8 |
"hf_configuration": {
|
9 |
"token": "$HF_TOKEN",
|
|
|
10 |
"hf_organization": hf_org,
|
11 |
+
"private": True,
|
12 |
"hf_dataset_name": hf_dataset_name,
|
13 |
+
"concat_if_exist": False,
|
14 |
},
|
15 |
"model_list": [
|
16 |
{
|
17 |
+
"model_name": "Qwen/Qwen2.5-VL-72B-Instruct",
|
18 |
"provider": "novita",
|
19 |
"max_concurrent_requests": 32,
|
20 |
},
|
|
|
22 |
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
23 |
"provider": "novita",
|
24 |
"max_concurrent_requests": 32,
|
25 |
+
},
|
26 |
],
|
27 |
"model_roles": {
|
28 |
+
"ingestion": ["Qwen/Qwen2.5-VL-72B-Instruct"],
|
29 |
"summarization": ["Qwen/Qwen2.5-72B-Instruct"],
|
30 |
+
"chunking": ["intfloat/multilingual-e5-large-instruct"],
|
31 |
+
"single_shot_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
|
32 |
+
"multi_hop_question_generation": ["Qwen/Qwen2.5-72B-Instruct"],
|
|
|
33 |
},
|
34 |
"pipeline": {
|
35 |
"ingestion": {
|
36 |
+
"source_documents_dir": f"/home/user/app/{session_uid}/uploaded_files/",
|
37 |
+
"output_dir": f"/home/user/app/{session_uid}/ingested",
|
38 |
"run": True,
|
39 |
},
|
40 |
"upload_ingest_to_hub": {
|
41 |
+
"source_documents_dir": f"/home/user/app/{session_uid}/ingested",
|
42 |
+
"run": True,
|
43 |
+
},
|
44 |
+
"summarization": {
|
45 |
"run": True,
|
46 |
},
|
|
|
47 |
"chunking": {
|
48 |
+
"run": True,
|
49 |
"chunking_configuration": {
|
50 |
"l_min_tokens": 64,
|
51 |
"l_max_tokens": 128,
|
52 |
+
"tau_threshold": 0.8,
|
53 |
"h_min": 2,
|
54 |
+
"h_max": 5,
|
55 |
+
"num_multihops_factor": 2,
|
56 |
},
|
|
|
57 |
},
|
58 |
"single_shot_question_generation": {
|
|
|
59 |
"run": True,
|
60 |
+
"additional_instructions": "Generate questions to test a curious adult",
|
61 |
+
"chunk_sampling": {
|
62 |
+
"mode": "count",
|
63 |
+
"value": 5,
|
64 |
+
"random_seed": 123,
|
65 |
+
},
|
66 |
},
|
67 |
+
"multi_hop_question_generation": {
|
|
|
|
|
68 |
"run": True,
|
69 |
+
"additional_instructions": "Generate questions to test a curious adult",
|
70 |
+
"chunk_sampling": {
|
71 |
+
"mode": "percentage",
|
72 |
+
"value": 0.3,
|
73 |
+
"random_seed": 42,
|
74 |
+
},
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
},
|
76 |
+
"lighteval": {
|
77 |
+
"run": True,
|
|
|
|
|
|
|
78 |
},
|
79 |
},
|
80 |
}
|
|
|
94 |
file_path = save_yaml_file(config, config_path)
|
95 |
logger.success(f"Config saved at: {file_path}")
|
96 |
return file_path
|
|
yourbench_space/evaluation.py
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
-
import
|
|
|
|
|
2 |
from yourbench_space.leaderboard_space.env import INIT_MODELS
|
3 |
|
4 |
-
|
|
|
5 |
OUTPUT_DIR = "/data" if ON_SPACES else "."
|
6 |
|
|
|
7 |
def create_eval_file(eval_ds_name):
|
8 |
# TODO: replace by Nathan's call
|
9 |
-
content =
|
|
|
10 |
from aenum import extend_enum
|
11 |
|
12 |
from lighteval.metrics.metrics import Metrics
|
@@ -31,10 +36,11 @@ def prompt_function(line, task_name: str = None):
|
|
31 |
gold_index=0,
|
32 |
specific={"question": line["question"]},
|
33 |
)
|
34 |
-
"""
|
|
|
35 |
|
36 |
hle = LightevalTaskConfig(
|
37 |
-
name="{eval_ds_name.replace(
|
38 |
suite=["custom"],
|
39 |
prompt_function=prompt_function,
|
40 |
hf_repo="{eval_ds_name}",
|
@@ -52,38 +58,46 @@ hle = LightevalTaskConfig(
|
|
52 |
|
53 |
|
54 |
TASKS_TABLE = [hle]
|
55 |
-
"""
|
56 |
-
|
|
|
57 |
with open(f"{OUTPUT_DIR}/custom_task.py", "w") as f:
|
58 |
f.write(content)
|
59 |
|
|
|
60 |
async def run_process(args: list) -> dict:
|
61 |
process = await asyncio.create_subprocess_exec(
|
62 |
-
*args,
|
63 |
-
stdout=asyncio.subprocess.PIPE,
|
64 |
-
stderr=asyncio.subprocess.PIPE
|
65 |
)
|
66 |
await asyncio.wait_for(process.wait(), timeout=180)
|
67 |
stdout = await process.stdout.read()
|
68 |
stderr = await process.stderr.read()
|
69 |
-
return {
|
70 |
-
|
71 |
-
'stdout': stdout.decode(),
|
72 |
-
'stderr': stderr.decode()
|
73 |
-
}
|
74 |
|
75 |
async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
76 |
tasks = []
|
77 |
for model_name, provider in INIT_MODELS:
|
78 |
args = [
|
79 |
-
"lighteval",
|
80 |
-
"endpoint",
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
]
|
84 |
tasks.append(run_process(args))
|
85 |
# Will capture the task if failed
|
86 |
processes = await asyncio.gather(*tasks, return_exceptions=True)
|
87 |
if all(not isinstance(result, Exception) for result in processes):
|
88 |
return "✅"
|
89 |
-
return "At least one model failed"
|
|
|
1 |
+
import os
|
2 |
+
import asyncio
|
3 |
+
|
4 |
from yourbench_space.leaderboard_space.env import INIT_MODELS
|
5 |
|
6 |
+
|
7 |
+
ON_SPACES = os.environ.get("system") == "spaces"
|
8 |
OUTPUT_DIR = "/data" if ON_SPACES else "."
|
9 |
|
10 |
+
|
11 |
def create_eval_file(eval_ds_name):
|
12 |
# TODO: replace by Nathan's call
|
13 |
+
content = (
|
14 |
+
"""
|
15 |
from aenum import extend_enum
|
16 |
|
17 |
from lighteval.metrics.metrics import Metrics
|
|
|
36 |
gold_index=0,
|
37 |
specific={"question": line["question"]},
|
38 |
)
|
39 |
+
"""
|
40 |
+
+ f"""
|
41 |
|
42 |
hle = LightevalTaskConfig(
|
43 |
+
name="{eval_ds_name.replace("/", "_")}",
|
44 |
suite=["custom"],
|
45 |
prompt_function=prompt_function,
|
46 |
hf_repo="{eval_ds_name}",
|
|
|
58 |
|
59 |
|
60 |
TASKS_TABLE = [hle]
|
61 |
+
"""
|
62 |
+
)
|
63 |
+
|
64 |
with open(f"{OUTPUT_DIR}/custom_task.py", "w") as f:
|
65 |
f.write(content)
|
66 |
|
67 |
+
|
68 |
async def run_process(args: list) -> dict:
|
69 |
process = await asyncio.create_subprocess_exec(
|
70 |
+
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
|
|
|
71 |
)
|
72 |
await asyncio.wait_for(process.wait(), timeout=180)
|
73 |
stdout = await process.stdout.read()
|
74 |
stderr = await process.stderr.read()
|
75 |
+
return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
|
76 |
+
|
|
|
|
|
|
|
77 |
|
78 |
async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
79 |
tasks = []
|
80 |
for model_name, provider in INIT_MODELS:
|
81 |
args = [
|
82 |
+
"lighteval",
|
83 |
+
"endpoint",
|
84 |
+
"inference-providers",
|
85 |
+
f"model={model_name},provider={provider}",
|
86 |
+
f"custom|{eval_ds_name.replace('/', '_')}|0|0",
|
87 |
+
"--custom-tasks",
|
88 |
+
f"{OUTPUT_DIR}/custom_task.py",
|
89 |
+
"--max-samples",
|
90 |
+
"10",
|
91 |
+
"--output-dir",
|
92 |
+
f"{OUTPUT_DIR}",
|
93 |
+
"--save-details",
|
94 |
+
"--results-org",
|
95 |
+
org,
|
96 |
+
"--push-to-hub",
|
97 |
]
|
98 |
tasks.append(run_process(args))
|
99 |
# Will capture the task if failed
|
100 |
processes = await asyncio.gather(*tasks, return_exceptions=True)
|
101 |
if all(not isinstance(result, Exception) for result in processes):
|
102 |
return "✅"
|
103 |
+
return "At least one model failed"
|
yourbench_space/utils.py
CHANGED
@@ -1,15 +1,17 @@
|
|
1 |
import io
|
2 |
import os
|
3 |
import re
|
4 |
-
import pathlib
|
5 |
import shutil
|
|
|
6 |
import subprocess
|
7 |
-
import
|
|
|
8 |
import pandas as pd
|
9 |
-
from collections import defaultdict
|
10 |
-
from datasets import load_dataset
|
11 |
from loguru import logger
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
STAGES = [
|
15 |
"ingestion",
|
@@ -17,12 +19,25 @@ STAGES = [
|
|
17 |
"summarization",
|
18 |
"chunking",
|
19 |
"single_shot_question_generation",
|
20 |
-
"
|
21 |
-
|
22 |
-
#"create_leaderboard"
|
23 |
-
# "judge_answers", # to uncomment when fixed
|
24 |
]
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def is_running_locally() -> bool:
|
27 |
"""
|
28 |
Returns True if Gradio is running locally, False if it's running in a Hugging Face Space.
|
@@ -33,7 +48,7 @@ def is_running_locally() -> bool:
|
|
33 |
def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files: List[pathlib.Path]) -> str:
|
34 |
"""Save uploaded files to the UPLOAD_DIRECTORY/uuid safely"""
|
35 |
if oauth_token is None and not is_running_locally():
|
36 |
-
gr.Warning(
|
37 |
return
|
38 |
|
39 |
saved_paths = []
|
@@ -41,7 +56,7 @@ def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files
|
|
41 |
for file in [file.name for file in files]:
|
42 |
try:
|
43 |
source_path = pathlib.Path(file)
|
44 |
-
upload_directory_uuid = pathlib.Path(f"/app/{session_state.value}/uploaded_files")
|
45 |
# Ensure the upload directory exists
|
46 |
upload_directory_uuid.mkdir(parents=True, exist_ok=True)
|
47 |
destination_path = upload_directory_uuid / source_path.name
|
@@ -56,11 +71,8 @@ def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files
|
|
56 |
except Exception as e:
|
57 |
print(f"Error moving file {file}: {e}")
|
58 |
|
59 |
-
return (
|
60 |
-
|
61 |
-
if saved_paths
|
62 |
-
else "No files were saved"
|
63 |
-
)
|
64 |
|
65 |
def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OAuthToken):
|
66 |
"""
|
@@ -68,31 +80,57 @@ def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OA
|
|
68 |
"""
|
69 |
ingestion_df = pd.DataFrame()
|
70 |
summarization_df = pd.DataFrame()
|
71 |
-
|
72 |
-
|
|
|
73 |
|
74 |
# Construct dataset name from config
|
75 |
dataset_name = f"{hf_org}/{hf_prefix}"
|
76 |
|
77 |
-
if "
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
|
94 |
class SubprocessManagerGroup:
|
95 |
"""Instanciates one manager per user (should be used as a singleton class)"""
|
|
|
96 |
def __init__(self):
|
97 |
self.managers: dict[str, SubprocessManager] = {}
|
98 |
|
@@ -115,8 +153,15 @@ class SubprocessManagerGroup:
|
|
115 |
uid = SubprocessManagerGroup.grab_uuid(uid)
|
116 |
if manager := self.managers.get(uid):
|
117 |
manager.stop_process()
|
|
|
|
|
118 |
del self.managers[uid]
|
119 |
|
|
|
|
|
|
|
|
|
|
|
120 |
def start_process(self, uid: Union[str, gr.State], custom_env: dict | None):
|
121 |
uid = SubprocessManagerGroup.grab_uuid(uid)
|
122 |
self.managers[uid].start_process(custom_env=custom_env)
|
@@ -141,13 +186,14 @@ class SubprocessManagerGroup:
|
|
141 |
return manager.is_running()
|
142 |
return False
|
143 |
|
|
|
144 |
class SubprocessManager:
|
145 |
def __init__(self, session_uid: str):
|
146 |
self.session_uid = session_uid
|
147 |
-
self.path = pathlib.Path(f"/app/{session_uid}")
|
148 |
self.path.mkdir(parents=True, exist_ok=True)
|
149 |
self.config_path = pathlib.Path(f"{self.path}/config.yml")
|
150 |
-
self.command = ["uv", "run", "yourbench",
|
151 |
self.process = None
|
152 |
self.output_stream = io.StringIO()
|
153 |
self.exit_code = None
|
@@ -160,7 +206,7 @@ class SubprocessManager:
|
|
160 |
|
161 |
self.output_stream = io.StringIO()
|
162 |
self.exit_code = None
|
163 |
-
|
164 |
try:
|
165 |
logger.info(f"Starting process with command: {' '.join(self.command)}")
|
166 |
self.process = subprocess.Popen(
|
@@ -195,9 +241,12 @@ class SubprocessManager:
|
|
195 |
pass
|
196 |
|
197 |
current_output = self.output_stream.getvalue()
|
198 |
-
completed_stages = list(set(re.findall(r"
|
|
|
|
|
199 |
|
200 |
-
|
|
|
201 |
|
202 |
def stop_process(self):
|
203 |
"""Terminate the subprocess."""
|
@@ -207,7 +256,7 @@ class SubprocessManager:
|
|
207 |
logger.info("Sending SIGTERM to the Process")
|
208 |
try:
|
209 |
self.process.terminate()
|
210 |
-
self.exit_code =
|
211 |
logger.info(f"Process terminated by user with exit code {self.exit_code}")
|
212 |
except subprocess.TimeoutExpired:
|
213 |
logger.warning("Process did not terminate within timeout, sending SIGKILL")
|
@@ -221,7 +270,7 @@ class SubprocessManager:
|
|
221 |
logger.info("Sending SIGKILL to the Process")
|
222 |
try:
|
223 |
self.process.kill()
|
224 |
-
self.exit_code = self.process.wait(timeout=5)
|
225 |
logger.info(f"Process killed by user with exit code {self.exit_code}")
|
226 |
except subprocess.TimeoutExpired:
|
227 |
logger.error("Process could not be killed within timeout")
|
@@ -237,11 +286,11 @@ class SubprocessManager:
|
|
237 |
"""Return exit code and reason if process has terminated"""
|
238 |
if self.process is None:
|
239 |
return None, "Process was never started"
|
240 |
-
|
241 |
if self.is_running():
|
242 |
return None, "Process is still running"
|
243 |
-
|
244 |
-
if
|
245 |
return self.exit_code, "Process exited abnormaly"
|
246 |
|
247 |
return self.exit_code, "Process exited normaly"
|
@@ -250,3 +299,5 @@ class SubprocessManager:
|
|
250 |
"""Stop the process when object is deleted"""
|
251 |
if self.process:
|
252 |
self.process.kill()
|
|
|
|
|
|
1 |
import io
|
2 |
import os
|
3 |
import re
|
|
|
4 |
import shutil
|
5 |
+
import pathlib
|
6 |
import subprocess
|
7 |
+
from typing import List, Union, Optional
|
8 |
+
|
9 |
import pandas as pd
|
|
|
|
|
10 |
from loguru import logger
|
11 |
+
|
12 |
+
import gradio as gr
|
13 |
+
from datasets import load_dataset
|
14 |
+
|
15 |
|
16 |
STAGES = [
|
17 |
"ingestion",
|
|
|
19 |
"summarization",
|
20 |
"chunking",
|
21 |
"single_shot_question_generation",
|
22 |
+
"multi_hop_question_generation",
|
23 |
+
"lighteval",
|
|
|
|
|
24 |
]
|
25 |
|
26 |
+
STAGE_DISPLAY_MAP = {
|
27 |
+
"ingestion": "Process Input Docs",
|
28 |
+
"upload_ingest_to_hub": "Upload Dataset to Hub",
|
29 |
+
"summarization": "Summarize Documents",
|
30 |
+
"chunking": "Chunk Documents",
|
31 |
+
"single_shot_question_generation": "Generate Single Shot Questions",
|
32 |
+
"multi_hop_question_generation": "Generate Multi Hop Questions",
|
33 |
+
"lighteval": "Generate Lighteval Subset",
|
34 |
+
}
|
35 |
+
|
36 |
+
|
37 |
+
def map_stage_names(stages: list[str]) -> list[str]:
|
38 |
+
return [STAGE_DISPLAY_MAP.get(stage, stage) for stage in stages]
|
39 |
+
|
40 |
+
|
41 |
def is_running_locally() -> bool:
|
42 |
"""
|
43 |
Returns True if Gradio is running locally, False if it's running in a Hugging Face Space.
|
|
|
48 |
def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files: List[pathlib.Path]) -> str:
|
49 |
"""Save uploaded files to the UPLOAD_DIRECTORY/uuid safely"""
|
50 |
if oauth_token is None and not is_running_locally():
|
51 |
+
gr.Warning("You need to log in to use this Space")
|
52 |
return
|
53 |
|
54 |
saved_paths = []
|
|
|
56 |
for file in [file.name for file in files]:
|
57 |
try:
|
58 |
source_path = pathlib.Path(file)
|
59 |
+
upload_directory_uuid = pathlib.Path(f"/home/user/app/{session_state.value}/uploaded_files")
|
60 |
# Ensure the upload directory exists
|
61 |
upload_directory_uuid.mkdir(parents=True, exist_ok=True)
|
62 |
destination_path = upload_directory_uuid / source_path.name
|
|
|
71 |
except Exception as e:
|
72 |
print(f"Error moving file {file}: {e}")
|
73 |
|
74 |
+
return f"Files saved to: {', '.join(saved_paths)}" if saved_paths else "No files were saved"
|
75 |
+
|
|
|
|
|
|
|
76 |
|
77 |
def update_dataset(stages: list, hf_org: str, hf_prefix: str, oauth_token: gr.OAuthToken):
|
78 |
"""
|
|
|
80 |
"""
|
81 |
ingestion_df = pd.DataFrame()
|
82 |
summarization_df = pd.DataFrame()
|
83 |
+
single_shot_df = pd.DataFrame()
|
84 |
+
multi_hop_df = pd.DataFrame()
|
85 |
+
lighteval_df = pd.DataFrame()
|
86 |
|
87 |
# Construct dataset name from config
|
88 |
dataset_name = f"{hf_org}/{hf_prefix}"
|
89 |
|
90 |
+
if STAGE_DISPLAY_MAP["upload_ingest_to_hub"] in stages:
|
91 |
+
ingestion_ds = load_dataset(
|
92 |
+
dataset_name, name="ingested", split="train", streaming=True, token=oauth_token.token
|
93 |
+
).select_columns("document_text")
|
94 |
+
ingestion_df = pd.DataFrame(ingestion_ds.take(1))
|
95 |
+
|
96 |
+
if STAGE_DISPLAY_MAP["summarization"] in stages:
|
97 |
+
summarization_ds = load_dataset(
|
98 |
+
dataset_name, name="summarized", split="train", streaming=True, token=oauth_token.token
|
99 |
+
).select_columns(["raw_document_summary", "document_summary", "summarization_model"])
|
100 |
+
summarization_df = pd.DataFrame(summarization_ds.take(5))
|
101 |
+
|
102 |
+
if STAGE_DISPLAY_MAP["single_shot_question_generation"] in stages:
|
103 |
+
single_shot_ds = load_dataset(
|
104 |
+
dataset_name,
|
105 |
+
name="single_shot_questions",
|
106 |
+
split="train",
|
107 |
+
streaming=True,
|
108 |
+
token=oauth_token.token,
|
109 |
+
).select_columns(["question", "self_answer", "estimated_difficulty"])
|
110 |
+
single_shot_df = pd.DataFrame(single_shot_ds.take(5))
|
111 |
+
|
112 |
+
if STAGE_DISPLAY_MAP["multi_hop_question_generation"] in stages:
|
113 |
+
multi_hop_ds = load_dataset(
|
114 |
+
dataset_name,
|
115 |
+
name="multi_hop_questions",
|
116 |
+
split="train",
|
117 |
+
streaming=True,
|
118 |
+
token=oauth_token.token,
|
119 |
+
).select_columns(["question", "self_answer", "estimated_difficulty"])
|
120 |
+
multi_hop_df = pd.DataFrame(multi_hop_ds.take(5))
|
121 |
+
|
122 |
+
if STAGE_DISPLAY_MAP["lighteval"] in stages:
|
123 |
+
lighteval_ds = load_dataset(
|
124 |
+
dataset_name, name="lighteval", split="train", streaming=True, token=oauth_token.token
|
125 |
+
).select_columns(["question", "ground_truth_answer", "question_category", "kind"])
|
126 |
+
lighteval_df = pd.DataFrame(lighteval_ds.take(5))
|
127 |
+
|
128 |
+
return (ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df)
|
129 |
|
130 |
|
131 |
class SubprocessManagerGroup:
|
132 |
"""Instanciates one manager per user (should be used as a singleton class)"""
|
133 |
+
|
134 |
def __init__(self):
|
135 |
self.managers: dict[str, SubprocessManager] = {}
|
136 |
|
|
|
153 |
uid = SubprocessManagerGroup.grab_uuid(uid)
|
154 |
if manager := self.managers.get(uid):
|
155 |
manager.stop_process()
|
156 |
+
manager.clean_workdir()
|
157 |
+
|
158 |
del self.managers[uid]
|
159 |
|
160 |
+
def clean_workdir(self, uid: Union[str, gr.State]):
|
161 |
+
uid = SubprocessManagerGroup.grab_uuid(uid)
|
162 |
+
if manager := self.managers.get(uid):
|
163 |
+
manager.clean_workdir()
|
164 |
+
|
165 |
def start_process(self, uid: Union[str, gr.State], custom_env: dict | None):
|
166 |
uid = SubprocessManagerGroup.grab_uuid(uid)
|
167 |
self.managers[uid].start_process(custom_env=custom_env)
|
|
|
186 |
return manager.is_running()
|
187 |
return False
|
188 |
|
189 |
+
|
190 |
class SubprocessManager:
|
191 |
def __init__(self, session_uid: str):
|
192 |
self.session_uid = session_uid
|
193 |
+
self.path = pathlib.Path(f"/home/user/app/{session_uid}")
|
194 |
self.path.mkdir(parents=True, exist_ok=True)
|
195 |
self.config_path = pathlib.Path(f"{self.path}/config.yml")
|
196 |
+
self.command = ["uv", "run", "yourbench", "run", "--config", str(self.config_path)]
|
197 |
self.process = None
|
198 |
self.output_stream = io.StringIO()
|
199 |
self.exit_code = None
|
|
|
206 |
|
207 |
self.output_stream = io.StringIO()
|
208 |
self.exit_code = None
|
209 |
+
|
210 |
try:
|
211 |
logger.info(f"Starting process with command: {' '.join(self.command)}")
|
212 |
self.process = subprocess.Popen(
|
|
|
241 |
pass
|
242 |
|
243 |
current_output = self.output_stream.getvalue()
|
244 |
+
completed_stages = list(set(re.findall(r"Completed stage: '([^']*)'", current_output)))
|
245 |
+
|
246 |
+
return current_output, map_stage_names(completed_stages)
|
247 |
|
248 |
+
def clean_workdir(self):
|
249 |
+
shutil.rmtree(self.path, ignore_errors=True)
|
250 |
|
251 |
def stop_process(self):
|
252 |
"""Terminate the subprocess."""
|
|
|
256 |
logger.info("Sending SIGTERM to the Process")
|
257 |
try:
|
258 |
self.process.terminate()
|
259 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to terminate
|
260 |
logger.info(f"Process terminated by user with exit code {self.exit_code}")
|
261 |
except subprocess.TimeoutExpired:
|
262 |
logger.warning("Process did not terminate within timeout, sending SIGKILL")
|
|
|
270 |
logger.info("Sending SIGKILL to the Process")
|
271 |
try:
|
272 |
self.process.kill()
|
273 |
+
self.exit_code = self.process.wait(timeout=5) # Wait up to 5 seconds for process to be killed
|
274 |
logger.info(f"Process killed by user with exit code {self.exit_code}")
|
275 |
except subprocess.TimeoutExpired:
|
276 |
logger.error("Process could not be killed within timeout")
|
|
|
286 |
"""Return exit code and reason if process has terminated"""
|
287 |
if self.process is None:
|
288 |
return None, "Process was never started"
|
289 |
+
|
290 |
if self.is_running():
|
291 |
return None, "Process is still running"
|
292 |
+
|
293 |
+
if self.exit_code is not None and self.exit_code != 0:
|
294 |
return self.exit_code, "Process exited abnormaly"
|
295 |
|
296 |
return self.exit_code, "Process exited normaly"
|
|
|
299 |
"""Stop the process when object is deleted"""
|
300 |
if self.process:
|
301 |
self.process.kill()
|
302 |
+
|
303 |
+
self.clean_workdir()
|