Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
3adea5e
1
Parent(s):
1d9fcdf
tmp
Browse files- pyproject.toml +1 -0
- yourbench_space/__init__.py +2 -0
- yourbench_space/app.py +46 -44
- yourbench_space/config.py +5 -3
- yourbench_space/evaluation.py +8 -59
- yourbench_space/utils.py +4 -2
pyproject.toml
CHANGED
@@ -18,6 +18,7 @@ dependencies = [
|
|
18 |
"python-dotenv>=1.0.1",
|
19 |
"tqdm>=4.67.1",
|
20 |
"ruff>=0.11.2",
|
|
|
21 |
]
|
22 |
|
23 |
[build-system]
|
|
|
18 |
"python-dotenv>=1.0.1",
|
19 |
"tqdm>=4.67.1",
|
20 |
"ruff>=0.11.2",
|
21 |
+
"lighteval @ git+https://github.com/huggingface/[email protected]",
|
22 |
]
|
23 |
|
24 |
[build-system]
|
yourbench_space/__init__.py
CHANGED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
PATH = "/home/user/app" if os.environ.get("system") == "spaces" else "app"
|
yourbench_space/app.py
CHANGED
@@ -10,6 +10,7 @@ from loguru import logger
|
|
10 |
import gradio as gr
|
11 |
from datasets import load_dataset
|
12 |
from huggingface_hub import whoami
|
|
|
13 |
from yourbench_space.utils import (
|
14 |
STAGES,
|
15 |
SubprocessManagerGroup,
|
@@ -278,46 +279,49 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
278 |
)
|
279 |
|
280 |
with gr.Tab("Run Generation", id=1):
|
281 |
-
with gr.
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
with gr.
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
with gr.
|
306 |
-
with gr.
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
|
|
|
|
|
|
321 |
|
322 |
stages_table.change(
|
323 |
update_dataset,
|
@@ -325,8 +329,6 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
325 |
outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
|
326 |
)
|
327 |
|
328 |
-
with gr.Accordion("Log Output", open=False):
|
329 |
-
log_output = gr.Code(language=None, lines=20, interactive=False)
|
330 |
|
331 |
# TODO: this timer should only be active when the second tab is passed to active for the first time
|
332 |
log_timer = gr.Timer(1.0, active=True)
|
@@ -336,7 +338,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
336 |
outputs=[log_output, stages_table],
|
337 |
)
|
338 |
|
339 |
-
with gr.Tab("Evaluate", id=2
|
340 |
with gr.Row():
|
341 |
btn_launch_evals = gr.Button("Launch evaluations")
|
342 |
status = gr.Textbox(label="Status")
|
@@ -344,4 +346,4 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
|
|
344 |
|
345 |
app.load(init_session, outputs=session_state)
|
346 |
|
347 |
-
app.launch(allowed_paths=[
|
|
|
10 |
import gradio as gr
|
11 |
from datasets import load_dataset
|
12 |
from huggingface_hub import whoami
|
13 |
+
from yourbench_space import PATH
|
14 |
from yourbench_space.utils import (
|
15 |
STAGES,
|
16 |
SubprocessManagerGroup,
|
|
|
279 |
)
|
280 |
|
281 |
with gr.Tab("Run Generation", id=1):
|
282 |
+
with gr.Row():
|
283 |
+
start_button = gr.Button("Start Task")
|
284 |
+
stop_button = gr.Button("Stop Task")
|
285 |
+
kill_button = gr.Button("Kill Task")
|
286 |
+
|
287 |
+
start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
|
288 |
+
stop_button.click(MANAGERS.stop_process, inputs=session_state)
|
289 |
+
kill_button.click(MANAGERS.kill_process, inputs=session_state)
|
290 |
+
|
291 |
+
process_status = gr.Checkbox(label="Process Status", interactive=False)
|
292 |
+
status_timer = gr.Timer(2.0, active=True)
|
293 |
+
status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
|
294 |
+
|
295 |
+
with gr.Row():
|
296 |
+
with gr.Accordion("Stages", open=True):
|
297 |
+
stages_table = gr.CheckboxGroup(
|
298 |
+
choices=map_stage_names(STAGES),
|
299 |
+
value=[],
|
300 |
+
label="Pipeline Stages Completed",
|
301 |
+
container=False,
|
302 |
+
interactive=False,
|
303 |
+
)
|
304 |
+
|
305 |
+
with gr.Row():
|
306 |
+
with gr.Column():
|
307 |
+
with gr.Accordion("Log Output", open=True):
|
308 |
+
log_output = gr.Code(language=None, lines=20, interactive=False)
|
309 |
+
|
310 |
+
with gr.Column():
|
311 |
+
with gr.Accordion("Ingestion Preview"):
|
312 |
+
ingestion_df = gr.DataFrame()
|
313 |
+
|
314 |
+
with gr.Accordion("Summarization Preview"):
|
315 |
+
summarization_df = gr.DataFrame()
|
316 |
+
|
317 |
+
with gr.Accordion("Single Shot Preview"):
|
318 |
+
single_shot_df = gr.DataFrame()
|
319 |
+
|
320 |
+
with gr.Accordion("Multi Hop Preview"):
|
321 |
+
multi_hop_df = gr.DataFrame()
|
322 |
+
|
323 |
+
with gr.Accordion("Lighteval Preview"):
|
324 |
+
lighteval_df = gr.DataFrame()
|
325 |
|
326 |
stages_table.change(
|
327 |
update_dataset,
|
|
|
329 |
outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
|
330 |
)
|
331 |
|
|
|
|
|
332 |
|
333 |
# TODO: this timer should only be active when the second tab is passed to active for the first time
|
334 |
log_timer = gr.Timer(1.0, active=True)
|
|
|
338 |
outputs=[log_output, stages_table],
|
339 |
)
|
340 |
|
341 |
+
with gr.Tab("Evaluate", id=2):
|
342 |
with gr.Row():
|
343 |
btn_launch_evals = gr.Button("Launch evaluations")
|
344 |
status = gr.Textbox(label="Status")
|
|
|
346 |
|
347 |
app.load(init_session, outputs=session_state)
|
348 |
|
349 |
+
app.launch(allowed_paths=[PATH])
|
yourbench_space/config.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import yaml
|
2 |
from loguru import logger
|
3 |
|
|
|
|
|
4 |
|
5 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
6 |
"""Creates the base config dictionary"""
|
@@ -33,12 +35,12 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
|
33 |
},
|
34 |
"pipeline": {
|
35 |
"ingestion": {
|
36 |
-
"source_documents_dir": f"/
|
37 |
-
"output_dir": f"/
|
38 |
"run": True,
|
39 |
},
|
40 |
"upload_ingest_to_hub": {
|
41 |
-
"source_documents_dir": f"/
|
42 |
"run": True,
|
43 |
},
|
44 |
"summarization": {
|
|
|
1 |
import yaml
|
2 |
from loguru import logger
|
3 |
|
4 |
+
from yourbench_space import PATH
|
5 |
+
|
6 |
|
7 |
def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
|
8 |
"""Creates the base config dictionary"""
|
|
|
35 |
},
|
36 |
"pipeline": {
|
37 |
"ingestion": {
|
38 |
+
"source_documents_dir": f"{PATH}/{session_uid}/uploaded_files/",
|
39 |
+
"output_dir": f"{PATH}/{session_uid}/ingested",
|
40 |
"run": True,
|
41 |
},
|
42 |
"upload_ingest_to_hub": {
|
43 |
+
"source_documents_dir": f"{PATH}/{session_uid}/ingested",
|
44 |
"run": True,
|
45 |
},
|
46 |
"summarization": {
|
yourbench_space/evaluation.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import asyncio
|
3 |
|
4 |
from yourbench_space.leaderboard_space.env import INIT_MODELS
|
@@ -8,62 +9,9 @@ ON_SPACES = os.environ.get("system") == "spaces"
|
|
8 |
OUTPUT_DIR = "/data" if ON_SPACES else "."
|
9 |
|
10 |
|
11 |
-
def create_eval_file(eval_ds_name):
|
12 |
-
|
13 |
-
|
14 |
-
"""
|
15 |
-
from aenum import extend_enum
|
16 |
-
|
17 |
-
from lighteval.metrics.metrics import Metrics
|
18 |
-
from lighteval.metrics.utils.metric_utils import (
|
19 |
-
CorpusLevelMetricGrouping,
|
20 |
-
MetricCategory,
|
21 |
-
MetricUseCase,
|
22 |
-
)
|
23 |
-
from lighteval.tasks.lighteval_task import LightevalTaskConfig
|
24 |
-
from lighteval.tasks.extended.hle.main import JudgeLLMHLE
|
25 |
-
from lighteval.tasks.requests import Doc
|
26 |
-
|
27 |
-
|
28 |
-
def prompt_function(line, task_name: str = None):
|
29 |
-
if line["image"] not in [None, ""]:
|
30 |
-
return
|
31 |
-
|
32 |
-
return Doc(
|
33 |
-
task_name=task_name,
|
34 |
-
query="Question: " + line["question"] + "\\nAnswer:",
|
35 |
-
choices=[line["answer"]],
|
36 |
-
gold_index=0,
|
37 |
-
specific={"question": line["question"]},
|
38 |
-
)
|
39 |
-
"""
|
40 |
-
+ f"""
|
41 |
-
|
42 |
-
hle = LightevalTaskConfig(
|
43 |
-
name="{eval_ds_name.replace("/", "_")}",
|
44 |
-
suite=["custom"],
|
45 |
-
prompt_function=prompt_function,
|
46 |
-
hf_repo="{eval_ds_name}",
|
47 |
-
hf_subset="default",
|
48 |
-
hf_avail_splits=["test"],
|
49 |
-
evaluation_splits=["test"],
|
50 |
-
few_shots_split=None,
|
51 |
-
few_shots_select=None,
|
52 |
-
generation_size=8192,
|
53 |
-
metric=[Metrics.exact_match],
|
54 |
-
stop_sequence=[],
|
55 |
-
trust_dataset=True,
|
56 |
-
version=0,
|
57 |
-
)
|
58 |
-
|
59 |
-
|
60 |
-
TASKS_TABLE = [hle]
|
61 |
-
"""
|
62 |
-
)
|
63 |
-
|
64 |
-
with open(f"{OUTPUT_DIR}/custom_task.py", "w") as f:
|
65 |
-
f.write(content)
|
66 |
-
|
67 |
|
68 |
async def run_process(args: list) -> dict:
|
69 |
process = await asyncio.create_subprocess_exec(
|
@@ -76,6 +24,7 @@ async def run_process(args: list) -> dict:
|
|
76 |
|
77 |
|
78 |
async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
|
|
79 |
tasks = []
|
80 |
for model_name, provider in INIT_MODELS:
|
81 |
args = [
|
@@ -83,11 +32,11 @@ async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
|
83 |
"endpoint",
|
84 |
"inference-providers",
|
85 |
f"model={model_name},provider={provider}",
|
86 |
-
f"custom|{
|
87 |
"--custom-tasks",
|
88 |
-
f"{
|
89 |
"--max-samples",
|
90 |
-
"
|
91 |
"--output-dir",
|
92 |
f"{OUTPUT_DIR}",
|
93 |
"--save-details",
|
|
|
1 |
import os
|
2 |
+
import subprocess
|
3 |
import asyncio
|
4 |
|
5 |
from yourbench_space.leaderboard_space.env import INIT_MODELS
|
|
|
9 |
OUTPUT_DIR = "/data" if ON_SPACES else "."
|
10 |
|
11 |
|
12 |
+
def create_eval_file(eval_ds_name: str):
|
13 |
+
task_name = eval_ds_name.replace("/", "_")
|
14 |
+
subprocess.run(["lighteval", "tasks", "create", "examples/custom_tasks_templates/custom_yourbench_task.py", task_name, eval_ds_name])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
async def run_process(args: list) -> dict:
|
17 |
process = await asyncio.create_subprocess_exec(
|
|
|
24 |
|
25 |
|
26 |
async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
27 |
+
task_name = eval_ds_name.replace("/", "_")
|
28 |
tasks = []
|
29 |
for model_name, provider in INIT_MODELS:
|
30 |
args = [
|
|
|
32 |
"endpoint",
|
33 |
"inference-providers",
|
34 |
f"model={model_name},provider={provider}",
|
35 |
+
f"custom|{task_name}|0|0",
|
36 |
"--custom-tasks",
|
37 |
+
f"custom_{task_name}_task.py",
|
38 |
"--max-samples",
|
39 |
+
"30",
|
40 |
"--output-dir",
|
41 |
f"{OUTPUT_DIR}",
|
42 |
"--save-details",
|
yourbench_space/utils.py
CHANGED
@@ -12,6 +12,8 @@ from loguru import logger
|
|
12 |
import gradio as gr
|
13 |
from datasets import load_dataset
|
14 |
|
|
|
|
|
15 |
|
16 |
STAGES = [
|
17 |
"ingestion",
|
@@ -56,7 +58,7 @@ def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files
|
|
56 |
for file in [file.name for file in files]:
|
57 |
try:
|
58 |
source_path = pathlib.Path(file)
|
59 |
-
upload_directory_uuid = pathlib.Path(f"/
|
60 |
# Ensure the upload directory exists
|
61 |
upload_directory_uuid.mkdir(parents=True, exist_ok=True)
|
62 |
destination_path = upload_directory_uuid / source_path.name
|
@@ -190,7 +192,7 @@ class SubprocessManagerGroup:
|
|
190 |
class SubprocessManager:
|
191 |
def __init__(self, session_uid: str):
|
192 |
self.session_uid = session_uid
|
193 |
-
self.path = pathlib.Path(f"/
|
194 |
self.path.mkdir(parents=True, exist_ok=True)
|
195 |
self.config_path = pathlib.Path(f"{self.path}/config.yml")
|
196 |
self.command = ["uv", "run", "yourbench", "run", "--config", str(self.config_path)]
|
|
|
12 |
import gradio as gr
|
13 |
from datasets import load_dataset
|
14 |
|
15 |
+
from yourbench_space import PATH
|
16 |
+
|
17 |
|
18 |
STAGES = [
|
19 |
"ingestion",
|
|
|
58 |
for file in [file.name for file in files]:
|
59 |
try:
|
60 |
source_path = pathlib.Path(file)
|
61 |
+
upload_directory_uuid = pathlib.Path(f"{PATH}/{session_state.value}/uploaded_files")
|
62 |
# Ensure the upload directory exists
|
63 |
upload_directory_uuid.mkdir(parents=True, exist_ok=True)
|
64 |
destination_path = upload_directory_uuid / source_path.name
|
|
|
192 |
class SubprocessManager:
|
193 |
def __init__(self, session_uid: str):
|
194 |
self.session_uid = session_uid
|
195 |
+
self.path = pathlib.Path(f"{PATH}/{session_uid}")
|
196 |
self.path.mkdir(parents=True, exist_ok=True)
|
197 |
self.config_path = pathlib.Path(f"{self.path}/config.yml")
|
198 |
self.command = ["uv", "run", "yourbench", "run", "--config", str(self.config_path)]
|