File size: 4,868 Bytes
e5f33ab
 
10e9b7d
950d883
4063e48
eccf8e4
3c4371f
950d883
10e9b7d
6de08e3
a81d3df
e342cb2
a81d3df
 
6de08e3
a81d3df
6de08e3
a81d3df
6de08e3
 
 
950d883
e89ec77
 
950d883
e5f33ab
e89ec77
950d883
 
6de08e3
950d883
6de08e3
a81d3df
e89ec77
6de08e3
e89ec77
 
 
 
a81d3df
 
 
 
e89ec77
 
 
 
 
 
 
 
 
6de08e3
 
4063e48
a81d3df
 
 
 
6de08e3
 
4063e48
a81d3df
4063e48
 
 
 
6de08e3
4063e48
a81d3df
6de08e3
4063e48
 
a81d3df
 
 
4063e48
a81d3df
4063e48
 
a81d3df
4063e48
 
 
a81d3df
6de08e3
 
a81d3df
6de08e3
 
4063e48
 
 
 
a81d3df
4063e48
 
 
 
 
 
e5f33ab
4063e48
5f30ad7
4063e48
 
0bb868f
a81d3df
5f30ad7
6de08e3
e80aab9
e89ec77
e5f33ab
 
 
950d883
 
 
6de08e3
e5f33ab
950d883
e80aab9
5f30ad7
950d883
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# app.py

import os
import time
import traceback
import requests
import pandas as pd
import gradio as gr

# ─── Configuration ──────────────────────────────────────────────────────────
API_URL  = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") 
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct")
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")

if not HF_TOKEN or not SPACE_ID:
    raise RuntimeError(
        "❌ Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets."
    )

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

WELCOME = """
## GAIA Benchmark Runner πŸŽ‰

Build your agent, score **β‰₯30%** to earn your Certificate,  
and see where you land on the Student Leaderboard!
"""

# ─── Simple HF-Inference Agent ─────────────────────────────────────────────
class GAIAAgent:
    def __init__(self, model_id: str):
        print(f"[DEBUG] Initializing GAIAAgent with model={model_id}")
        self.model_id = model_id
        self.headers = HEADERS

    def answer(self, prompt: str) -> str:
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": 512,
                "temperature": 0.2
            }
        }
        url = f"https://api-inference.huggingface.co/models/{self.model_id}"
        resp = requests.post(url, headers=self.headers, json=payload, timeout=60)
        resp.raise_for_status()
        data = resp.json()
        if isinstance(data, list) and data and "generated_text" in data[0]:
            return data[0]["generated_text"].strip()
        return str(data)

# ─── Gradio callback ────────────────────────────────────────────────────────
def run_and_submit_all():
    try:
        # 1) Fetch username via WhoAmI
        who = requests.get("https://huggingface.co./api/whoami-v2", headers=HEADERS, timeout=10)
        who.raise_for_status()
        username = who.json().get("user", {}).get("username")
        if not username:
            return "❌ Could not fetch your HF username. Check your token.", pd.DataFrame()

        # 2) Fetch GAIA questions
        q_resp = requests.get(f"{API_URL}/questions", timeout=15)
        q_resp.raise_for_status()
        questions = q_resp.json() or []
        if not questions:
            return "❌ No questions returned; check your API_URL.", pd.DataFrame()

        # 3) Initialize and run agent
        agent = GAIAAgent(MODEL_ID)
        results = []
        payload = []
        for task in questions:
            tid  = task["task_id"]
            q   = task.get("question", "")
            try:
                ans = agent.answer(q)
            except Exception as e:
                ans = f"ERROR: {e}"
            results.append({"Task ID": tid, "Question": q, "Answer": ans})
            payload.append({"task_id": tid, "submitted_answer": ans})
            time.sleep(0.5)

        # 4) Submit answers (including agent_code)
        submission = {
            "username":   username,
            "agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main",
            "answers":    payload
        }
        s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
        s_resp.raise_for_status()
        data = s_resp.json()

        # 5) Build and return status + results table
        status = (
            f"βœ… **Submission Successful!**\n\n"
            f"**User:** {data.get('username')}\n"
            f"**Score:** {data.get('score')}% "
            f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
            f"**Message:** {data.get('message')}"
        )
        return status, pd.DataFrame(results)

    except Exception as e:
        tb = traceback.format_exc()
        print("[ERROR] Unhandled exception:\n", tb)
        return f"❌ Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame()

# ─── Gradio UI ──────────────────────────────────────────────────────────────
with gr.Blocks() as demo:
    gr.Markdown(WELCOME)
    run_btn  = gr.Button("▢️ Run GAIA Benchmark")
    status   = gr.Markdown()
    table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[],
        outputs=[status, table_df]
    )

if __name__ == "__main__":
    demo.launch()