Final_Assignment_Template

Restarting

File size: 4,417 Bytes

# app.py

import os
import time
import requests
import pandas as pd
import gradio as gr

from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    PythonInterpreterTool,
    InferenceClientModel
)

# --- Configuration ---
API_URL  = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
SPACE_ID = os.getenv("SPACE_ID")                     # e.g. "your-username/your-space"
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")     # Hugging Face token
# No need for HF_USERNAME—Gradio OAuthProfile provides it

if not all([SPACE_ID, HF_TOKEN]):
    raise RuntimeError(
        "Please set the following environment variables in your Space settings:\n"
        "  • SPACE_ID\n"
        "  • HUGGINGFACEHUB_API_TOKEN"
    )

WELCOME_TEXT = """
## Welcome to the GAIA Benchmark Runner 🎉

This challenge is your final hands-on project:
- Build an agent and evaluate it on a subset of the GAIA benchmark.
- You need **≥30%** to earn your Certificate of Completion. 🏅
- Submit your score and see how you stack up on the Student Leaderboard!
"""

# --- Agent Definition ---
class GAIAAgent:
    def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"):
        # Initialize HF Inference client
        self.model = InferenceClientModel(
            model_id=model_id,
            token=HF_TOKEN,
            provider="hf-inference",
            timeout=120,
            temperature=0.2
        )
        # Attach search + code execution tools
        tools = [
            DuckDuckGoSearchTool(),
            PythonInterpreterTool()
        ]
        self.agent = CodeAgent(
            tools=tools,
            model=self.model,
            executor_type="local"
        )

    def answer(self, question: str, task_file: str = None) -> str:
        prompt = question
        if task_file:
            try:
                with open(task_file, "r") as f:
                    content = f.read()
                prompt += f"\n\nAttached file:\n```\n{content}\n```"
            except:
                pass
        return self.agent.run(prompt)

# --- Runner & Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if profile is None:
        return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()

    username = profile.username

    # 1) Fetch GAIA questions
    q_resp = requests.get(f"{API_URL}/questions", timeout=15)
    q_resp.raise_for_status()
    questions = q_resp.json() or []
    if not questions:
        return "❌ No questions returned; check your API_URL.", pd.DataFrame()

    # 2) Initialize your agent
    agent = GAIAAgent()

    # 3) Run agent on each question
    results, payload = [], []
    for item in questions:
        task_id   = item.get("task_id")
        question  = item.get("question", "")
        file_path = item.get("task_file_path")  # optional

        try:
            answer = agent.answer(question, file_path)
        except Exception as e:
            answer = f"ERROR: {e}"

        results.append({
            "Task ID": task_id,
            "Question": question,
            "Answer": answer
        })
        payload.append({
            "task_id": task_id,
            "submitted_answer": answer
        })

        time.sleep(0.5)  # throttle requests

    # 4) Submit all answers
    submission = {
        "username":   username,
        "agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main",
        "answers":    payload
    }
    s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
    s_resp.raise_for_status()
    data = s_resp.json()

    # 5) Build status message
    status = (
        f"✅ **Submission Successful!**\n\n"
        f"**User:** {data.get('username')}\n"
        f"**Score:** {data.get('score')}% "
        f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
        f"**Message:** {data.get('message')}"
    )

    return status, pd.DataFrame(results)


# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown(WELCOME_TEXT)
    login = gr.LoginButton()
    run_btn = gr.Button("▶️ Run Benchmark & Submit")
    status_out = gr.Markdown()
    table_out  = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[login],
        outputs=[status_out, table_out]
    )

if __name__ == "__main__":
    demo.launch()