advanced

Running on CPU Upgrade

alozowski HF Staff commited on 29 days ago

Commit

2d54755

1 Parent(s): fdfafe5

Fix token for space generation

Files changed (2) hide show

yourbench_space/app.py CHANGED Viewed

@@ -25,11 +25,16 @@ from yourbench_space.evaluation import run_evaluations, create_eval_file
 project_description = """
-# YourBench 🚀
-**Dynamic Benchmark Generation for Language Models**
-Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
-- 💻 [Yourbench GitHub](https://github.com/huggingface/yourbench)
 """
 logger.remove()
@@ -147,9 +152,13 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
         logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
         return "❌ Failed: Dataset loading error"
     try:
         create_eval_file(eval_ds_name)
-        status = asyncio.run(run_evaluations(eval_ds_name=eval_ds_name, org=org_name))
     except Exception as e:
         logger.error(f"Evaluation error: {e}")
         return f"❌ Failed: Evaluation error\n{e}"
@@ -238,7 +247,7 @@ def init_session(profile: gr.OAuthProfile | None):
 btn_launch_evals = gr.Button(
     "🚀 Launch Evaluation",
     visible=True,
-    interactive=True,  # Start non-interactive
     variant="primary",
 )

 project_description = """
+# 🚀 YourBench
+### Dynamic Benchmark Generation from Your Documents
+- Create zero-shot benchmarks from your documents — no manual labeling
+- Evaluate top open models and publish a leaderboard in one click
+- Run locally or explore the [source on GitHub](https://github.com/huggingface/yourbench)
+⚠️ **Important:** This app uses your Hugging Face token for inference and uploads — you are responsible for any usage costs
+Built with 🤗 by the [Hugging Face OpenEvals team](https://huggingface.co/OpenEvals)
 """
 logger.remove()
         logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
         return "❌ Failed: Dataset loading error"
+    new_env = os.environ.copy()
+    if oauth_token:
+        new_env["HF_TOKEN"] = oauth_token.token
     try:
         create_eval_file(eval_ds_name)
+        status = asyncio.run(run_evaluations(oauth_token = new_env, org=org_name, eval_ds_name=eval_ds_name,))
     except Exception as e:
         logger.error(f"Evaluation error: {e}")
         return f"❌ Failed: Evaluation error\n{e}"
 btn_launch_evals = gr.Button(
     "🚀 Launch Evaluation",
     visible=True,
+    interactive=False,  # Start non-interactive
     variant="primary",
 )

yourbench_space/evaluation.py CHANGED Viewed

@@ -18,9 +18,9 @@ def create_eval_file(eval_ds_name: str):
     subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
-async def run_process(args: list) -> dict:
     process = await asyncio.create_subprocess_exec(
-        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
     )
     await asyncio.wait_for(process.wait(), timeout=180)
     stdout = await process.stdout.read()
@@ -28,7 +28,7 @@ async def run_process(args: list) -> dict:
     return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
-async def run_evaluations(eval_ds_name: str, org: str) -> list:
     task_name = eval_ds_name.replace("/", "_")
     tasks = []
     for model_name, provider in INIT_MODELS:
@@ -49,7 +49,7 @@ async def run_evaluations(eval_ds_name: str, org: str) -> list:
             org,
             "--push-to-hub",
         ]
-        tasks.append(run_process(args))
     # Will capture the task if failed
     processes = await asyncio.gather(*tasks, return_exceptions=True)
     for process in processes:

     subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
+async def run_process(args: list, custom_env = None) -> dict:
     process = await asyncio.create_subprocess_exec(
+        *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=custom_env
     )
     await asyncio.wait_for(process.wait(), timeout=180)
     stdout = await process.stdout.read()
     return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
+async def run_evaluations(eval_ds_name: str, org: str, custom_env = None) -> list:
     task_name = eval_ds_name.replace("/", "_")
     tasks = []
     for model_name, provider in INIT_MODELS:
             org,
             "--push-to-hub",
         ]
+        tasks.append(run_process(args, custom_env))
     # Will capture the task if failed
     processes = await asyncio.gather(*tasks, return_exceptions=True)
     for process in processes: