Spaces:
Restarting
Restarting
# app.py | |
import os | |
import time | |
import requests | |
import pandas as pd | |
import gradio as gr | |
# --- Config from Env --- | |
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") | |
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") | |
HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
WELCOME = """ | |
## GAIA Benchmark Runner 🎉 | |
Build your agent, score **≥30%** to earn your Certificate, | |
and see where you land on the Student Leaderboard! | |
""" | |
# --- Simple HF-Inference Agent --- | |
class GAIAAgent: | |
def __init__(self, model_id: str, token: str): | |
self.model_id = model_id | |
self.headers = {"Authorization": f"Bearer {token}"} | |
def answer(self, prompt: str) -> str: | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 512, | |
"temperature": 0.2 | |
} | |
} | |
url = f"https://api-inference.huggingface.co/models/{self.model_id}" | |
resp = requests.post(url, headers=self.headers, json=payload, timeout=60) | |
resp.raise_for_status() | |
data = resp.json() | |
if isinstance(data, list) and data and "generated_text" in data[0]: | |
return data[0]["generated_text"].strip() | |
return str(data) | |
# --- Gradio callback --- | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
if profile is None: | |
return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame() | |
username = profile.username | |
hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None) | |
if not hf_token: | |
return ( | |
"❌ No Hugging Face token found.\n" | |
"Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.", | |
pd.DataFrame() | |
) | |
# 1) Fetch GAIA questions | |
q_resp = requests.get(f"{API_URL}/questions", timeout=15) | |
q_resp.raise_for_status() | |
questions = q_resp.json() or [] | |
if not questions: | |
return "❌ No questions found. Check your API_URL.", pd.DataFrame() | |
# 2) Init agent | |
agent = GAIAAgent(MODEL_ID, hf_token) | |
# 3) Answer each | |
results = [] | |
payload = [] | |
for item in questions: | |
tid = item.get("task_id") | |
qtxt = item.get("question", "") | |
try: | |
ans = agent.answer(qtxt) | |
except Exception as e: | |
ans = f"ERROR: {e}" | |
results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) | |
payload.append({"task_id": tid, "submitted_answer": ans}) | |
time.sleep(0.5) | |
# 4) Submit (no agent_code) | |
submission = { | |
"username": username, | |
"answers": payload | |
} | |
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) | |
s_resp.raise_for_status() | |
data = s_resp.json() | |
# 5) Build status text | |
status = ( | |
f"✅ **Submission Successful!**\n\n" | |
f"**User:** {data.get('username')}\n" | |
f"**Score:** {data.get('score')}% " | |
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" | |
f"**Message:** {data.get('message')}" | |
) | |
return status, pd.DataFrame(results) | |
# --- Gradio UI --- | |
with gr.Blocks() as demo: | |
gr.Markdown(WELCOME) | |
login = gr.LoginButton() | |
run_btn = gr.Button("▶️ Run GAIA Benchmark") | |
status = gr.Markdown() | |
table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) | |
run_btn.click( | |
fn=run_and_submit_all, | |
inputs=[login], | |
outputs=[status, table_df] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |