Spaces:
yourbench
/
Running on CPU Upgrade

alozowski HF Staff commited on
Commit
2d54755
Β·
1 Parent(s): fdfafe5

Fix token for space generation

Browse files
yourbench_space/app.py CHANGED
@@ -25,11 +25,16 @@ from yourbench_space.evaluation import run_evaluations, create_eval_file
25
 
26
 
27
  project_description = """
28
- # YourBench πŸš€
29
- **Dynamic Benchmark Generation for Language Models**
30
 
31
- Quickly create zero-shot benchmarks from your documents – keeping models accurate and adaptable
32
- - πŸ’» [Yourbench GitHub](https://github.com/huggingface/yourbench)
 
 
 
 
 
33
  """
34
 
35
  logger.remove()
@@ -147,9 +152,13 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
147
  logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
148
  return "❌ Failed: Dataset loading error"
149
 
 
 
 
 
150
  try:
151
  create_eval_file(eval_ds_name)
152
- status = asyncio.run(run_evaluations(eval_ds_name=eval_ds_name, org=org_name))
153
  except Exception as e:
154
  logger.error(f"Evaluation error: {e}")
155
  return f"❌ Failed: Evaluation error\n{e}"
@@ -238,7 +247,7 @@ def init_session(profile: gr.OAuthProfile | None):
238
  btn_launch_evals = gr.Button(
239
  "πŸš€ Launch Evaluation",
240
  visible=True,
241
- interactive=True, # Start non-interactive
242
  variant="primary",
243
  )
244
 
 
25
 
26
 
27
  project_description = """
28
+ # πŸš€ YourBench
29
+ ### Dynamic Benchmark Generation from Your Documents
30
 
31
+ - Create zero-shot benchmarks from your documents β€” no manual labeling
32
+ - Evaluate top open models and publish a leaderboard in one click
33
+ - Run locally or explore the [source on GitHub](https://github.com/huggingface/yourbench)
34
+
35
+ ⚠️ **Important:** This app uses your Hugging Face token for inference and uploads β€” you are responsible for any usage costs
36
+
37
+ Built with πŸ€— by the [Hugging Face OpenEvals team](https://huggingface.co/OpenEvals)
38
  """
39
 
40
  logger.remove()
 
152
  logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
153
  return "❌ Failed: Dataset loading error"
154
 
155
+ new_env = os.environ.copy()
156
+ if oauth_token:
157
+ new_env["HF_TOKEN"] = oauth_token.token
158
+
159
  try:
160
  create_eval_file(eval_ds_name)
161
+ status = asyncio.run(run_evaluations(oauth_token = new_env, org=org_name, eval_ds_name=eval_ds_name,))
162
  except Exception as e:
163
  logger.error(f"Evaluation error: {e}")
164
  return f"❌ Failed: Evaluation error\n{e}"
 
247
  btn_launch_evals = gr.Button(
248
  "πŸš€ Launch Evaluation",
249
  visible=True,
250
+ interactive=False, # Start non-interactive
251
  variant="primary",
252
  )
253
 
yourbench_space/evaluation.py CHANGED
@@ -18,9 +18,9 @@ def create_eval_file(eval_ds_name: str):
18
  subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
19
 
20
 
21
- async def run_process(args: list) -> dict:
22
  process = await asyncio.create_subprocess_exec(
23
- *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
24
  )
25
  await asyncio.wait_for(process.wait(), timeout=180)
26
  stdout = await process.stdout.read()
@@ -28,7 +28,7 @@ async def run_process(args: list) -> dict:
28
  return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
29
 
30
 
31
- async def run_evaluations(eval_ds_name: str, org: str) -> list:
32
  task_name = eval_ds_name.replace("/", "_")
33
  tasks = []
34
  for model_name, provider in INIT_MODELS:
@@ -49,7 +49,7 @@ async def run_evaluations(eval_ds_name: str, org: str) -> list:
49
  org,
50
  "--push-to-hub",
51
  ]
52
- tasks.append(run_process(args))
53
  # Will capture the task if failed
54
  processes = await asyncio.gather(*tasks, return_exceptions=True)
55
  for process in processes:
 
18
  subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
19
 
20
 
21
+ async def run_process(args: list, custom_env = None) -> dict:
22
  process = await asyncio.create_subprocess_exec(
23
+ *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=custom_env
24
  )
25
  await asyncio.wait_for(process.wait(), timeout=180)
26
  stdout = await process.stdout.read()
 
28
  return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
29
 
30
 
31
+ async def run_evaluations(eval_ds_name: str, org: str, custom_env = None) -> list:
32
  task_name = eval_ds_name.replace("/", "_")
33
  tasks = []
34
  for model_name, provider in INIT_MODELS:
 
49
  org,
50
  "--push-to-hub",
51
  ]
52
+ tasks.append(run_process(args, custom_env))
53
  # Will capture the task if failed
54
  processes = await asyncio.gather(*tasks, return_exceptions=True)
55
  for process in processes: