Spaces:
Restarting
Restarting
File size: 4,804 Bytes
e5f33ab 10e9b7d 950d883 4063e48 eccf8e4 3c4371f 950d883 10e9b7d e89ec77 e5f33ab 950d883 e89ec77 950d883 e5f33ab e89ec77 950d883 e89ec77 0bb868f e89ec77 0bb868f e89ec77 4063e48 e89ec77 0bb868f e89ec77 950d883 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 0bb868f 4063e48 e5f33ab 4063e48 5f30ad7 4063e48 0bb868f 4063e48 5f30ad7 e80aab9 e89ec77 e5f33ab 950d883 e5f33ab 950d883 e80aab9 5f30ad7 950d883 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# app.py
import os
import time
import traceback
import requests
import pandas as pd
import gradio as gr
# --- Config from Env ---
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct")
HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN")
WELCOME = """
## GAIA Benchmark Runner 🎉
Build your agent, score **≥30%** to earn your Certificate,
and see where you land on the Student Leaderboard!
"""
class GAIAAgent:
def __init__(self, model_id: str, token: str):
print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") # debug
self.model_id = model_id
self.headers = {"Authorization": f"Bearer {token}"}
def answer(self, prompt: str) -> str:
print(f"[DEBUG] Sending prompt of length {len(prompt)} to HF Inference") # debug
payload = {
"inputs": prompt,
"parameters": {"max_new_tokens": 512, "temperature": 0.2}
}
url = f"https://api-inference.huggingface.co/models/{self.model_id}"
resp = requests.post(url, headers=self.headers, json=payload, timeout=60)
resp.raise_for_status()
data = resp.json()
print(f"[DEBUG] Got response from model: {data!r}") # debug
if isinstance(data, list) and data and "generated_text" in data[0]:
return data[0]["generated_text"].strip()
return str(data)
def run_and_submit_all(profile: gr.OAuthProfile | None):
try:
print("[DEBUG] run_and_submit_all called") # debug
if profile is None:
print("[DEBUG] No profile provided") # debug
return ("⚠️ Please log in with your Hugging Face account.", pd.DataFrame())
print(f"[DEBUG] Logged in as: {profile.username}") # debug
username = profile.username
hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None)
print(f"[DEBUG] Using HF token from {'env' if HF_TOKEN_ENV else 'profile'}") # debug
if not hf_token:
print("[DEBUG] No HF token found") # debug
return (
"❌ No Hugging Face token found.\n"
"Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.",
pd.DataFrame()
)
# 1) Fetch GAIA questions
print(f"[DEBUG] Fetching questions from {API_URL}/questions") # debug
q_resp = requests.get(f"{API_URL}/questions", timeout=15)
q_resp.raise_for_status()
questions = q_resp.json() or []
print(f"[DEBUG] Received {len(questions)} questions") # debug
if not questions:
return ("❌ No questions found. Check your API_URL.", pd.DataFrame())
# 2) Init agent
agent = GAIAAgent(MODEL_ID, hf_token)
# 3) Answer each
results = []
payload = []
for item in questions:
print(f"[DEBUG] Processing task_id={item.get('task_id')}") # debug
tid = item.get("task_id")
qtxt = item.get("question", "")
try:
ans = agent.answer(qtxt)
except Exception as e:
ans = f"ERROR: {e}"
print(f"[DEBUG] Error answering: {e}") # debug
results.append({"Task ID": tid, "Question": qtxt, "Answer": ans})
payload.append({"task_id": tid, "submitted_answer": ans})
time.sleep(0.5)
# 4) Submit
print(f"[DEBUG] Submitting payload with {len(payload)} answers") # debug
submission = {"username": username, "answers": payload}
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
s_resp.raise_for_status()
data = s_resp.json()
print(f"[DEBUG] Submission response: {data!r}") # debug
# 5) Build status text
status = (
f"✅ **Submission Successful!**\n\n"
f"**User:** {data.get('username')}\n"
f"**Score:** {data.get('score')}% "
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
f"**Message:** {data.get('message')}"
)
return status, pd.DataFrame(results)
except Exception as e:
tb = traceback.format_exc()
print("[ERROR] Unhandled exception:\n", tb)
return (f"❌ Unexpected error:\n{e}", pd.DataFrame())
with gr.Blocks() as demo:
gr.Markdown(WELCOME)
login = gr.LoginButton()
run_btn = gr.Button("▶️ Run GAIA Benchmark")
status = gr.Markdown()
table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True)
run_btn.click(
fn=run_and_submit_all,
inputs=[login],
outputs=[status, table_df]
)
if __name__ == "__main__":
demo.launch()
|