Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Fix token for space generation
Browse files- yourbench_space/app.py +15 -6
- yourbench_space/evaluation.py +4 -4
yourbench_space/app.py
CHANGED
@@ -25,11 +25,16 @@ from yourbench_space.evaluation import run_evaluations, create_eval_file
|
|
25 |
|
26 |
|
27 |
project_description = """
|
28 |
-
# YourBench
|
29 |
-
|
30 |
|
31 |
-
|
32 |
-
-
|
|
|
|
|
|
|
|
|
|
|
33 |
"""
|
34 |
|
35 |
logger.remove()
|
@@ -147,9 +152,13 @@ def run_evaluation_pipeline(oauth_token: gr.OAuthToken | None, org_name, eval_na
|
|
147 |
logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
|
148 |
return "β Failed: Dataset loading error"
|
149 |
|
|
|
|
|
|
|
|
|
150 |
try:
|
151 |
create_eval_file(eval_ds_name)
|
152 |
-
status = asyncio.run(run_evaluations(
|
153 |
except Exception as e:
|
154 |
logger.error(f"Evaluation error: {e}")
|
155 |
return f"β Failed: Evaluation error\n{e}"
|
@@ -238,7 +247,7 @@ def init_session(profile: gr.OAuthProfile | None):
|
|
238 |
btn_launch_evals = gr.Button(
|
239 |
"π Launch Evaluation",
|
240 |
visible=True,
|
241 |
-
interactive=
|
242 |
variant="primary",
|
243 |
)
|
244 |
|
|
|
25 |
|
26 |
|
27 |
project_description = """
|
28 |
+
# π YourBench
|
29 |
+
### Dynamic Benchmark Generation from Your Documents
|
30 |
|
31 |
+
- Create zero-shot benchmarks from your documents β no manual labeling
|
32 |
+
- Evaluate top open models and publish a leaderboard in one click
|
33 |
+
- Run locally or explore the [source on GitHub](https://github.com/huggingface/yourbench)
|
34 |
+
|
35 |
+
β οΈ **Important:** This app uses your Hugging Face token for inference and uploads β you are responsible for any usage costs
|
36 |
+
|
37 |
+
Built with π€ by the [Hugging Face OpenEvals team](https://huggingface.co/OpenEvals)
|
38 |
"""
|
39 |
|
40 |
logger.remove()
|
|
|
152 |
logger.error(f"Failed to load dataset '{eval_ds_name}': {e}")
|
153 |
return "β Failed: Dataset loading error"
|
154 |
|
155 |
+
new_env = os.environ.copy()
|
156 |
+
if oauth_token:
|
157 |
+
new_env["HF_TOKEN"] = oauth_token.token
|
158 |
+
|
159 |
try:
|
160 |
create_eval_file(eval_ds_name)
|
161 |
+
status = asyncio.run(run_evaluations(oauth_token = new_env, org=org_name, eval_ds_name=eval_ds_name,))
|
162 |
except Exception as e:
|
163 |
logger.error(f"Evaluation error: {e}")
|
164 |
return f"β Failed: Evaluation error\n{e}"
|
|
|
247 |
btn_launch_evals = gr.Button(
|
248 |
"π Launch Evaluation",
|
249 |
visible=True,
|
250 |
+
interactive=False, # Start non-interactive
|
251 |
variant="primary",
|
252 |
)
|
253 |
|
yourbench_space/evaluation.py
CHANGED
@@ -18,9 +18,9 @@ def create_eval_file(eval_ds_name: str):
|
|
18 |
subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
|
19 |
|
20 |
|
21 |
-
async def run_process(args: list) -> dict:
|
22 |
process = await asyncio.create_subprocess_exec(
|
23 |
-
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
24 |
)
|
25 |
await asyncio.wait_for(process.wait(), timeout=180)
|
26 |
stdout = await process.stdout.read()
|
@@ -28,7 +28,7 @@ async def run_process(args: list) -> dict:
|
|
28 |
return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
|
29 |
|
30 |
|
31 |
-
async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
32 |
task_name = eval_ds_name.replace("/", "_")
|
33 |
tasks = []
|
34 |
for model_name, provider in INIT_MODELS:
|
@@ -49,7 +49,7 @@ async def run_evaluations(eval_ds_name: str, org: str) -> list:
|
|
49 |
org,
|
50 |
"--push-to-hub",
|
51 |
]
|
52 |
-
tasks.append(run_process(args))
|
53 |
# Will capture the task if failed
|
54 |
processes = await asyncio.gather(*tasks, return_exceptions=True)
|
55 |
for process in processes:
|
|
|
18 |
subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
|
19 |
|
20 |
|
21 |
+
async def run_process(args: list, custom_env = None) -> dict:
|
22 |
process = await asyncio.create_subprocess_exec(
|
23 |
+
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=custom_env
|
24 |
)
|
25 |
await asyncio.wait_for(process.wait(), timeout=180)
|
26 |
stdout = await process.stdout.read()
|
|
|
28 |
return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
|
29 |
|
30 |
|
31 |
+
async def run_evaluations(eval_ds_name: str, org: str, custom_env = None) -> list:
|
32 |
task_name = eval_ds_name.replace("/", "_")
|
33 |
tasks = []
|
34 |
for model_name, provider in INIT_MODELS:
|
|
|
49 |
org,
|
50 |
"--push-to-hub",
|
51 |
]
|
52 |
+
tasks.append(run_process(args, custom_env))
|
53 |
# Will capture the task if failed
|
54 |
processes = await asyncio.gather(*tasks, return_exceptions=True)
|
55 |
for process in processes:
|