advanced

Running on CPU Upgrade

App Files Files Community

Clémentine commited on Mar 24

Commit

3adea5e

1 Parent(s): 1d9fcdf

tmp

Browse files

Files changed (6) hide show

pyproject.toml +1 -0
yourbench_space/__init__.py +2 -0
yourbench_space/app.py +46 -44
yourbench_space/config.py +5 -3
yourbench_space/evaluation.py +8 -59
yourbench_space/utils.py +4 -2

pyproject.toml CHANGED Viewed

@@ -18,6 +18,7 @@ dependencies = [
     "python-dotenv>=1.0.1",
     "tqdm>=4.67.1",
     "ruff>=0.11.2",
 ]
 [build-system]

     "python-dotenv>=1.0.1",
     "tqdm>=4.67.1",
     "ruff>=0.11.2",
+    "lighteval @ git+https://github.com/huggingface/[email protected]",
 ]
 [build-system]

yourbench_space/__init__.py CHANGED Viewed

	@@ -0,0 +1,2 @@


1	+ import os
2	+ PATH = "/home/user/app" if os.environ.get("system") == "spaces" else "app"

yourbench_space/app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from loguru import logger
 import gradio as gr
 from datasets import load_dataset
 from huggingface_hub import whoami
 from yourbench_space.utils import (
     STAGES,
     SubprocessManagerGroup,
@@ -278,46 +279,49 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
             )
         with gr.Tab("Run Generation", id=1):
-            with gr.Column():
-                with gr.Row():
-                    start_button = gr.Button("Start Task")
-                    stop_button = gr.Button("Stop Task")
-                    kill_button = gr.Button("Kill Task")
-                start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
-                stop_button.click(MANAGERS.stop_process, inputs=session_state)
-                kill_button.click(MANAGERS.kill_process, inputs=session_state)
-                process_status = gr.Checkbox(label="Process Status", interactive=False)
-                status_timer = gr.Timer(2.0, active=True)
-                status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
-                with gr.Row():
-                    with gr.Accordion("Stages", open=True):
-                        stages_table = gr.CheckboxGroup(
-                            choices=map_stage_names(STAGES),
-                            value=[],
-                            label="Pipeline Stages Completed",
-                            container=False,
-                            interactive=False,
-                        )
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        with gr.Accordion("Ingestion Preview"):
-                            ingestion_df = gr.DataFrame()
-                        with gr.Accordion("Summarization Preview"):
-                            summarization_df = gr.DataFrame()
-                        with gr.Accordion("Single Shot Preview"):
-                            single_shot_df = gr.DataFrame()
-                        with gr.Accordion("Multi Hop Preview"):
-                            multi_hop_df = gr.DataFrame()
-                        with gr.Accordion("Lighteval Preview"):
-                            lighteval_df = gr.DataFrame()
                 stages_table.change(
                     update_dataset,
@@ -325,8 +329,6 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
                     outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
                 )
-                with gr.Accordion("Log Output", open=False):
-                    log_output = gr.Code(language=None, lines=20, interactive=False)
                 # TODO: this timer should only be active when the second tab is passed to active for the first time
                 log_timer = gr.Timer(1.0, active=True)
@@ -336,7 +338,7 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
                     outputs=[log_output, stages_table],
                 )
-        with gr.Tab("Evaluate", id=2, visible=False):
             with gr.Row():
                 btn_launch_evals = gr.Button("Launch evaluations")
                 status = gr.Textbox(label="Status")
@@ -344,4 +346,4 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
     app.load(init_session, outputs=session_state)
-app.launch(allowed_paths=["/home/user/app"])

 import gradio as gr
 from datasets import load_dataset
 from huggingface_hub import whoami
+from yourbench_space import PATH
 from yourbench_space.utils import (
     STAGES,
     SubprocessManagerGroup,
             )
         with gr.Tab("Run Generation", id=1):
+            with gr.Row():
+                start_button = gr.Button("Start Task")
+                stop_button = gr.Button("Stop Task")
+                kill_button = gr.Button("Kill Task")
+            start_button.click(prepare_task, inputs=[session_state, login_btn, hf_dataset_name])
+            stop_button.click(MANAGERS.stop_process, inputs=session_state)
+            kill_button.click(MANAGERS.kill_process, inputs=session_state)
+            process_status = gr.Checkbox(label="Process Status", interactive=False)
+            status_timer = gr.Timer(2.0, active=True)
+            status_timer.tick(update_process_status, inputs=session_state, outputs=process_status)
+            with gr.Row():
+                with gr.Accordion("Stages", open=True):
+                    stages_table = gr.CheckboxGroup(
+                        choices=map_stage_names(STAGES),
+                        value=[],
+                        label="Pipeline Stages Completed",
+                        container=False,
+                        interactive=False,
+                    )
+            with gr.Row():
+                with gr.Column():
+                    with gr.Accordion("Log Output", open=True):
+                        log_output = gr.Code(language=None, lines=20, interactive=False)
+                with gr.Column():
+                    with gr.Accordion("Ingestion Preview"):
+                        ingestion_df = gr.DataFrame()
+                    with gr.Accordion("Summarization Preview"):
+                        summarization_df = gr.DataFrame()
+                    with gr.Accordion("Single Shot Preview"):
+                        single_shot_df = gr.DataFrame()
+                    with gr.Accordion("Multi Hop Preview"):
+                        multi_hop_df = gr.DataFrame()
+                    with gr.Accordion("Lighteval Preview"):
+                        lighteval_df = gr.DataFrame()
                 stages_table.change(
                     update_dataset,
                     outputs=[ingestion_df, summarization_df, single_shot_df, multi_hop_df, lighteval_df],
                 )
                 # TODO: this timer should only be active when the second tab is passed to active for the first time
                 log_timer = gr.Timer(1.0, active=True)
                     outputs=[log_output, stages_table],
                 )
+        with gr.Tab("Evaluate", id=2):
             with gr.Row():
                 btn_launch_evals = gr.Button("Launch evaluations")
                 status = gr.Textbox(label="Status")
     app.load(init_session, outputs=session_state)
+app.launch(allowed_paths=[PATH])

yourbench_space/config.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import yaml
 from loguru import logger
 def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
     """Creates the base config dictionary"""
@@ -33,12 +35,12 @@ def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
         },
         "pipeline": {
             "ingestion": {
-                "source_documents_dir": f"/home/user/app/{session_uid}/uploaded_files/",
-                "output_dir": f"/home/user/app/{session_uid}/ingested",
                 "run": True,
             },
             "upload_ingest_to_hub": {
-                "source_documents_dir": f"/home/user/app/{session_uid}/ingested",
                 "run": True,
             },
             "summarization": {

 import yaml
 from loguru import logger
+from yourbench_space import PATH
 def generate_base_config(hf_org: str, hf_dataset_name: str, session_uid: str):
     """Creates the base config dictionary"""
         },
         "pipeline": {
             "ingestion": {
+                "source_documents_dir": f"{PATH}/{session_uid}/uploaded_files/",
+                "output_dir": f"{PATH}/{session_uid}/ingested",
                 "run": True,
             },
             "upload_ingest_to_hub": {
+                "source_documents_dir": f"{PATH}/{session_uid}/ingested",
                 "run": True,
             },
             "summarization": {

yourbench_space/evaluation.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import asyncio
 from yourbench_space.leaderboard_space.env import INIT_MODELS
@@ -8,62 +9,9 @@ ON_SPACES = os.environ.get("system") == "spaces"
 OUTPUT_DIR = "/data" if ON_SPACES else "."
-def create_eval_file(eval_ds_name):
-    # TODO: replace by Nathan's call
-    content = (
-        """
-from aenum import extend_enum
-from lighteval.metrics.metrics import Metrics
-from lighteval.metrics.utils.metric_utils import (
-    CorpusLevelMetricGrouping,
-    MetricCategory,
-    MetricUseCase,
-)
-from lighteval.tasks.lighteval_task import LightevalTaskConfig
-from lighteval.tasks.extended.hle.main import JudgeLLMHLE
-from lighteval.tasks.requests import Doc
-def prompt_function(line, task_name: str = None):
-    if line["image"] not in [None, ""]:
-        return
-    return Doc(
-        task_name=task_name,
-        query="Question: " + line["question"] + "\\nAnswer:",
-        choices=[line["answer"]],
-        gold_index=0,
-        specific={"question": line["question"]},
-    )
-"""
-        + f"""
-hle = LightevalTaskConfig(
-    name="{eval_ds_name.replace("/", "_")}",
-    suite=["custom"],
-    prompt_function=prompt_function,
-    hf_repo="{eval_ds_name}",
-    hf_subset="default",
-    hf_avail_splits=["test"],
-    evaluation_splits=["test"],
-    few_shots_split=None,
-    few_shots_select=None,
-    generation_size=8192,
-    metric=[Metrics.exact_match],
-    stop_sequence=[],
-    trust_dataset=True,
-    version=0,
-)
-TASKS_TABLE = [hle]
-"""
-    )
-    with open(f"{OUTPUT_DIR}/custom_task.py", "w") as f:
-        f.write(content)
 async def run_process(args: list) -> dict:
     process = await asyncio.create_subprocess_exec(
@@ -76,6 +24,7 @@ async def run_process(args: list) -> dict:
 async def run_evaluations(eval_ds_name: str, org: str) -> list:
     tasks = []
     for model_name, provider in INIT_MODELS:
         args = [
@@ -83,11 +32,11 @@ async def run_evaluations(eval_ds_name: str, org: str) -> list:
             "endpoint",
             "inference-providers",
             f"model={model_name},provider={provider}",
-            f"custom|{eval_ds_name.replace('/', '_')}|0|0",
             "--custom-tasks",
-            f"{OUTPUT_DIR}/custom_task.py",
             "--max-samples",
-            "10",
             "--output-dir",
             f"{OUTPUT_DIR}",
             "--save-details",

 import os
+import subprocess
 import asyncio
 from yourbench_space.leaderboard_space.env import INIT_MODELS
 OUTPUT_DIR = "/data" if ON_SPACES else "."
+def create_eval_file(eval_ds_name: str):
+    task_name = eval_ds_name.replace("/", "_")
+    subprocess.run(["lighteval", "tasks", "create", "examples/custom_tasks_templates/custom_yourbench_task.py", task_name, eval_ds_name])
 async def run_process(args: list) -> dict:
     process = await asyncio.create_subprocess_exec(
 async def run_evaluations(eval_ds_name: str, org: str) -> list:
+    task_name = eval_ds_name.replace("/", "_")
     tasks = []
     for model_name, provider in INIT_MODELS:
         args = [
             "endpoint",
             "inference-providers",
             f"model={model_name},provider={provider}",
+            f"custom|{task_name}|0|0",
             "--custom-tasks",
+            f"custom_{task_name}_task.py",
             "--max-samples",
+            "30",
             "--output-dir",
             f"{OUTPUT_DIR}",
             "--save-details",

yourbench_space/utils.py CHANGED Viewed

@@ -12,6 +12,8 @@ from loguru import logger
 import gradio as gr
 from datasets import load_dataset
 STAGES = [
     "ingestion",
@@ -56,7 +58,7 @@ def save_files(oauth_token: gr.OAuthToken | None, session_state: gr.State, files
     for file in [file.name for file in files]:
         try:
             source_path = pathlib.Path(file)
-            upload_directory_uuid = pathlib.Path(f"/home/user/app/{session_state.value}/uploaded_files")
             # Ensure the upload directory exists
             upload_directory_uuid.mkdir(parents=True, exist_ok=True)
             destination_path = upload_directory_uuid / source_path.name
@@ -190,7 +192,7 @@ class SubprocessManagerGroup:
 class SubprocessManager:
     def __init__(self, session_uid: str):
         self.session_uid = session_uid
-        self.path = pathlib.Path(f"/home/user/app/{session_uid}")
         self.path.mkdir(parents=True, exist_ok=True)
         self.config_path = pathlib.Path(f"{self.path}/config.yml")
         self.command = ["uv", "run", "yourbench", "run", "--config", str(self.config_path)]

 import gradio as gr
 from datasets import load_dataset
+from yourbench_space import PATH
 STAGES = [
     "ingestion",
     for file in [file.name for file in files]:
         try:
             source_path = pathlib.Path(file)
+            upload_directory_uuid = pathlib.Path(f"{PATH}/{session_state.value}/uploaded_files")
             # Ensure the upload directory exists
             upload_directory_uuid.mkdir(parents=True, exist_ok=True)
             destination_path = upload_directory_uuid / source_path.name
 class SubprocessManager:
     def __init__(self, session_uid: str):
         self.session_uid = session_uid
+        self.path = pathlib.Path(f"{PATH}/{session_uid}")
         self.path.mkdir(parents=True, exist_ok=True)
         self.config_path = pathlib.Path(f"{self.path}/config.yml")
         self.command = ["uv", "run", "yourbench", "run", "--config", str(self.config_path)]