Spaces:
Restarting
Restarting
# app.py | |
import os | |
import time | |
import traceback | |
import requests | |
import pandas as pd | |
import gradio as gr | |
# βββ Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") | |
SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") | |
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") | |
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
if not HF_TOKEN or not SPACE_ID: | |
raise RuntimeError( | |
"β Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets." | |
) | |
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} | |
WELCOME = """ | |
## GAIA Benchmark Runner π | |
Build your agent, score **β₯30%** to earn your Certificate, | |
and see where you land on the Student Leaderboard! | |
""" | |
# βββ Simple HF-Inference Agent βββββββββββββββββββββββββββββββββββββββββββββ | |
class GAIAAgent: | |
def __init__(self, model_id: str): | |
print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") | |
self.model_id = model_id | |
self.headers = HEADERS | |
def answer(self, prompt: str) -> str: | |
payload = { | |
"inputs": prompt, | |
"parameters": { | |
"max_new_tokens": 512, | |
"temperature": 0.2 | |
} | |
} | |
url = f"https://api-inference.huggingface.co/models/{self.model_id}" | |
resp = requests.post(url, headers=self.headers, json=payload, timeout=60) | |
resp.raise_for_status() | |
data = resp.json() | |
if isinstance(data, list) and data and "generated_text" in data[0]: | |
return data[0]["generated_text"].strip() | |
return str(data) | |
# βββ Gradio callback ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
def run_and_submit_all(): | |
try: | |
# 1) Fetch username via WhoAmI | |
who = requests.get("https://huggingface.co./api/whoami-v2", headers=HEADERS, timeout=10) | |
who.raise_for_status() | |
username = who.json().get("user", {}).get("username") | |
if not username: | |
return "β Could not fetch your HF username. Check your token.", pd.DataFrame() | |
# 2) Fetch GAIA questions | |
q_resp = requests.get(f"{API_URL}/questions", timeout=15) | |
q_resp.raise_for_status() | |
questions = q_resp.json() or [] | |
if not questions: | |
return "β No questions returned; check your API_URL.", pd.DataFrame() | |
# 3) Initialize and run agent | |
agent = GAIAAgent(MODEL_ID) | |
results = [] | |
payload = [] | |
for task in questions: | |
tid = task["task_id"] | |
q = task.get("question", "") | |
try: | |
ans = agent.answer(q) | |
except Exception as e: | |
ans = f"ERROR: {e}" | |
results.append({"Task ID": tid, "Question": q, "Answer": ans}) | |
payload.append({"task_id": tid, "submitted_answer": ans}) | |
time.sleep(0.5) | |
# 4) Submit answers (including agent_code) | |
submission = { | |
"username": username, | |
"agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main", | |
"answers": payload | |
} | |
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) | |
s_resp.raise_for_status() | |
data = s_resp.json() | |
# 5) Build and return status + results table | |
status = ( | |
f"β **Submission Successful!**\n\n" | |
f"**User:** {data.get('username')}\n" | |
f"**Score:** {data.get('score')}% " | |
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" | |
f"**Message:** {data.get('message')}" | |
) | |
return status, pd.DataFrame(results) | |
except Exception as e: | |
tb = traceback.format_exc() | |
print("[ERROR] Unhandled exception:\n", tb) | |
return f"β Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame() | |
# βββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
with gr.Blocks() as demo: | |
gr.Markdown(WELCOME) | |
run_btn = gr.Button("βΆοΈ Run GAIA Benchmark") | |
status = gr.Markdown() | |
table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) | |
run_btn.click( | |
fn=run_and_submit_all, | |
inputs=[], | |
outputs=[status, table_df] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |