File size: 4,417 Bytes
5f30ad7
950d883
10e9b7d
950d883
eccf8e4
3c4371f
950d883
10e9b7d
950d883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f30ad7
 
 
950d883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f30ad7
950d883
5f30ad7
950d883
5f30ad7
950d883
 
 
 
 
 
 
 
 
 
 
 
5f30ad7
 
 
950d883
 
e80aab9
950d883
 
 
 
 
 
 
 
 
 
 
e80aab9
5f30ad7
950d883
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# app.py

import os
import time
import requests
import pandas as pd
import gradio as gr

from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    PythonInterpreterTool,
    InferenceClientModel
)

# --- Configuration ---
API_URL  = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
SPACE_ID = os.getenv("SPACE_ID")                     # e.g. "your-username/your-space"
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")     # Hugging Face token
# No need for HF_USERNAME—Gradio OAuthProfile provides it

if not all([SPACE_ID, HF_TOKEN]):
    raise RuntimeError(
        "Please set the following environment variables in your Space settings:\n"
        "  • SPACE_ID\n"
        "  • HUGGINGFACEHUB_API_TOKEN"
    )

WELCOME_TEXT = """
## Welcome to the GAIA Benchmark Runner 🎉

This challenge is your final hands-on project:
- Build an agent and evaluate it on a subset of the GAIA benchmark.
- You need **≥30%** to earn your Certificate of Completion. 🏅
- Submit your score and see how you stack up on the Student Leaderboard!
"""

# --- Agent Definition ---
class GAIAAgent:
    def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"):
        # Initialize HF Inference client
        self.model = InferenceClientModel(
            model_id=model_id,
            token=HF_TOKEN,
            provider="hf-inference",
            timeout=120,
            temperature=0.2
        )
        # Attach search + code execution tools
        tools = [
            DuckDuckGoSearchTool(),
            PythonInterpreterTool()
        ]
        self.agent = CodeAgent(
            tools=tools,
            model=self.model,
            executor_type="local"
        )

    def answer(self, question: str, task_file: str = None) -> str:
        prompt = question
        if task_file:
            try:
                with open(task_file, "r") as f:
                    content = f.read()
                prompt += f"\n\nAttached file:\n```\n{content}\n```"
            except:
                pass
        return self.agent.run(prompt)

# --- Runner & Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if profile is None:
        return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()

    username = profile.username

    # 1) Fetch GAIA questions
    q_resp = requests.get(f"{API_URL}/questions", timeout=15)
    q_resp.raise_for_status()
    questions = q_resp.json() or []
    if not questions:
        return "❌ No questions returned; check your API_URL.", pd.DataFrame()

    # 2) Initialize your agent
    agent = GAIAAgent()

    # 3) Run agent on each question
    results, payload = [], []
    for item in questions:
        task_id   = item.get("task_id")
        question  = item.get("question", "")
        file_path = item.get("task_file_path")  # optional

        try:
            answer = agent.answer(question, file_path)
        except Exception as e:
            answer = f"ERROR: {e}"

        results.append({
            "Task ID": task_id,
            "Question": question,
            "Answer": answer
        })
        payload.append({
            "task_id": task_id,
            "submitted_answer": answer
        })

        time.sleep(0.5)  # throttle requests

    # 4) Submit all answers
    submission = {
        "username":   username,
        "agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main",
        "answers":    payload
    }
    s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
    s_resp.raise_for_status()
    data = s_resp.json()

    # 5) Build status message
    status = (
        f"✅ **Submission Successful!**\n\n"
        f"**User:** {data.get('username')}\n"
        f"**Score:** {data.get('score')}% "
        f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
        f"**Message:** {data.get('message')}"
    )

    return status, pd.DataFrame(results)


# --- Gradio Interface ---
with gr.Blocks() as demo:
    gr.Markdown(WELCOME_TEXT)
    login = gr.LoginButton()
    run_btn = gr.Button("▶️ Run Benchmark & Submit")
    status_out = gr.Markdown()
    table_out  = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[login],
        outputs=[status_out, table_out]
    )

if __name__ == "__main__":
    demo.launch()