File size: 4,804 Bytes
e5f33ab
 
10e9b7d
950d883
4063e48
eccf8e4
3c4371f
950d883
10e9b7d
e89ec77
e5f33ab
 
 
950d883
e89ec77
 
950d883
e5f33ab
e89ec77
950d883
 
 
e89ec77
0bb868f
e89ec77
 
 
 
0bb868f
e89ec77
 
4063e48
e89ec77
 
 
 
 
0bb868f
e89ec77
 
 
 
950d883
4063e48
0bb868f
4063e48
0bb868f
4063e48
0bb868f
4063e48
0bb868f
4063e48
0bb868f
4063e48
0bb868f
4063e48
 
 
 
 
 
 
0bb868f
4063e48
 
 
0bb868f
4063e48
 
 
 
 
 
 
 
 
 
0bb868f
4063e48
 
 
 
 
 
0bb868f
4063e48
 
 
 
 
0bb868f
4063e48
 
 
 
0bb868f
4063e48
 
 
 
 
 
 
 
e5f33ab
4063e48
5f30ad7
4063e48
 
0bb868f
4063e48
5f30ad7
e80aab9
e89ec77
e5f33ab
 
 
 
950d883
 
 
 
e5f33ab
950d883
e80aab9
5f30ad7
950d883
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# app.py

import os
import time
import traceback
import requests
import pandas as pd
import gradio as gr

# --- Config from Env ---
API_URL      = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
MODEL_ID     = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct")
HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN")

WELCOME = """
## GAIA Benchmark Runner 🎉

Build your agent, score **≥30%** to earn your Certificate,  
and see where you land on the Student Leaderboard!
"""

class GAIAAgent:
    def __init__(self, model_id: str, token: str):
        print(f"[DEBUG] Initializing GAIAAgent with model={model_id}")  # debug
        self.model_id = model_id
        self.headers = {"Authorization": f"Bearer {token}"}

    def answer(self, prompt: str) -> str:
        print(f"[DEBUG] Sending prompt of length {len(prompt)} to HF Inference")  # debug
        payload = {
            "inputs": prompt,
            "parameters": {"max_new_tokens": 512, "temperature": 0.2}
        }
        url = f"https://api-inference.huggingface.co/models/{self.model_id}"
        resp = requests.post(url, headers=self.headers, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        print(f"[DEBUG] Got response from model: {data!r}")  # debug
        if isinstance(data, list) and data and "generated_text" in data[0]:
            return data[0]["generated_text"].strip()
        return str(data)

def run_and_submit_all(profile: gr.OAuthProfile | None):
    try:
        print("[DEBUG] run_and_submit_all called")  # debug
        if profile is None:
            print("[DEBUG] No profile provided")  # debug
            return ("⚠️ Please log in with your Hugging Face account.", pd.DataFrame())
        print(f"[DEBUG] Logged in as: {profile.username}")  # debug
        username = profile.username

        hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None)
        print(f"[DEBUG] Using HF token from {'env' if HF_TOKEN_ENV else 'profile'}")  # debug
        if not hf_token:
            print("[DEBUG] No HF token found")  # debug
            return (
                "❌ No Hugging Face token found.\n"
                "Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.",
                pd.DataFrame()
            )

        # 1) Fetch GAIA questions
        print(f"[DEBUG] Fetching questions from {API_URL}/questions")  # debug
        q_resp = requests.get(f"{API_URL}/questions", timeout=15)
        q_resp.raise_for_status()
        questions = q_resp.json() or []
        print(f"[DEBUG] Received {len(questions)} questions")  # debug
        if not questions:
            return ("❌ No questions found. Check your API_URL.", pd.DataFrame())

        # 2) Init agent
        agent = GAIAAgent(MODEL_ID, hf_token)

        # 3) Answer each
        results = []
        payload = []
        for item in questions:
            print(f"[DEBUG] Processing task_id={item.get('task_id')}")  # debug
            tid  = item.get("task_id")
            qtxt = item.get("question", "")
            try:
                ans = agent.answer(qtxt)
            except Exception as e:
                ans = f"ERROR: {e}"
                print(f"[DEBUG] Error answering: {e}")  # debug
            results.append({"Task ID": tid, "Question": qtxt, "Answer": ans})
            payload.append({"task_id": tid, "submitted_answer": ans})
            time.sleep(0.5)

        # 4) Submit
        print(f"[DEBUG] Submitting payload with {len(payload)} answers")  # debug
        submission = {"username": username, "answers": payload}
        s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
        s_resp.raise_for_status()
        data = s_resp.json()
        print(f"[DEBUG] Submission response: {data!r}")  # debug

        # 5) Build status text
        status = (
            f"✅ **Submission Successful!**\n\n"
            f"**User:** {data.get('username')}\n"
            f"**Score:** {data.get('score')}% "
            f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
            f"**Message:** {data.get('message')}"
        )
        return status, pd.DataFrame(results)

    except Exception as e:
        tb = traceback.format_exc()
        print("[ERROR] Unhandled exception:\n", tb)
        return (f"❌ Unexpected error:\n{e}", pd.DataFrame())

with gr.Blocks() as demo:
    gr.Markdown(WELCOME)
    login    = gr.LoginButton()
    run_btn  = gr.Button("▶️ Run GAIA Benchmark")
    status   = gr.Markdown()
    table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[login],
        outputs=[status, table_df]
    )

if __name__ == "__main__":
    demo.launch()