# app.py import os import time import traceback import requests import pandas as pd import gradio as gr # ─── Configuration ────────────────────────────────────────────────────────── API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") if not HF_TOKEN or not SPACE_ID: raise RuntimeError( "❌ Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets." ) HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} WELCOME = """ ## GAIA Benchmark Runner 🎉 Build your agent, score **≥30%** to earn your Certificate, and see where you land on the Student Leaderboard! """ # ─── Simple HF-Inference Agent ───────────────────────────────────────────── class GAIAAgent: def __init__(self, model_id: str): print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") self.model_id = model_id self.headers = HEADERS def answer(self, prompt: str) -> str: payload = { "inputs": prompt, "parameters": { "max_new_tokens": 512, "temperature": 0.2 } } url = f"https://api-inference.huggingface.co/models/{self.model_id}" resp = requests.post(url, headers=self.headers, json=payload, timeout=60) resp.raise_for_status() data = resp.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"].strip() return str(data) # ─── Gradio callback ──────────────────────────────────────────────────────── def run_and_submit_all(): try: # 1) Fetch username via WhoAmI who = requests.get("https://huggingface.co./api/whoami-v2", headers=HEADERS, timeout=10) who.raise_for_status() username = who.json().get("user", {}).get("username") if not username: return "❌ Could not fetch your HF username. Check your token.", pd.DataFrame() # 2) Fetch GAIA questions q_resp = requests.get(f"{API_URL}/questions", timeout=15) q_resp.raise_for_status() questions = q_resp.json() or [] if not questions: return "❌ No questions returned; check your API_URL.", pd.DataFrame() # 3) Initialize and run agent agent = GAIAAgent(MODEL_ID) results = [] payload = [] for task in questions: tid = task["task_id"] q = task.get("question", "") try: ans = agent.answer(q) except Exception as e: ans = f"ERROR: {e}" results.append({"Task ID": tid, "Question": q, "Answer": ans}) payload.append({"task_id": tid, "submitted_answer": ans}) time.sleep(0.5) # 4) Submit answers (including agent_code) submission = { "username": username, "agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main", "answers": payload } s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) s_resp.raise_for_status() data = s_resp.json() # 5) Build and return status + results table status = ( f"✅ **Submission Successful!**\n\n" f"**User:** {data.get('username')}\n" f"**Score:** {data.get('score')}% " f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" f"**Message:** {data.get('message')}" ) return status, pd.DataFrame(results) except Exception as e: tb = traceback.format_exc() print("[ERROR] Unhandled exception:\n", tb) return f"❌ Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame() # ─── Gradio UI ────────────────────────────────────────────────────────────── with gr.Blocks() as demo: gr.Markdown(WELCOME) run_btn = gr.Button("▶️ Run GAIA Benchmark") status = gr.Markdown() table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) run_btn.click( fn=run_and_submit_all, inputs=[], outputs=[status, table_df] ) if __name__ == "__main__": demo.launch()