Spaces:
Restarting
Restarting
# app.py | |
import os | |
import time | |
import traceback | |
import requests | |
import pandas as pd | |
import gradio as gr | |
# --- Config from Env --- | |
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") | |
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") | |
HF_TOKEN_ENV = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
WELCOME = """ | |
## GAIA Benchmark Runner π | |
Build your agent, score **β₯30%** to earn your Certificate, | |
and see where you land on the Student Leaderboard! | |
""" | |
class GAIAAgent: | |
def __init__(self, model_id: str, token: str): | |
print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") # debug | |
self.model_id = model_id | |
self.headers = {"Authorization": f"Bearer {token}"} | |
def answer(self, prompt: str) -> str: | |
print(f"[DEBUG] Sending prompt of length {len(prompt)} to HF Inference") # debug | |
payload = { | |
"inputs": prompt, | |
"parameters": {"max_new_tokens": 512, "temperature": 0.2} | |
} | |
url = f"https://api-inference.huggingface.co/models/{self.model_id}" | |
resp = requests.post(url, headers=self.headers, json=payload, timeout=60) | |
resp.raise_for_status() | |
data = resp.json() | |
print(f"[DEBUG] Got response from model: {data!r}") # debug | |
if isinstance(data, list) and data and "generated_text" in data[0]: | |
return data[0]["generated_text"].strip() | |
return str(data) | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
try: | |
print("[DEBUG] run_and_submit_all called") # debug | |
if profile is None: | |
print("[DEBUG] No profile provided") # debug | |
return ("β οΈ Please log in with your Hugging Face account.", pd.DataFrame()) | |
print(f"[DEBUG] Logged in as: {profile.username}") # debug | |
username = profile.username | |
hf_token = HF_TOKEN_ENV or getattr(profile, "access_token", None) | |
print(f"[DEBUG] Using HF token from {'env' if HF_TOKEN_ENV else 'profile'}") # debug | |
if not hf_token: | |
print("[DEBUG] No HF token found") # debug | |
return ( | |
"β No Hugging Face token found.\n" | |
"Set HUGGINGFACEHUB_API_TOKEN in Secrets or log in via the button.", | |
pd.DataFrame() | |
) | |
# 1) Fetch GAIA questions | |
print(f"[DEBUG] Fetching questions from {API_URL}/questions") # debug | |
q_resp = requests.get(f"{API_URL}/questions", timeout=15) | |
q_resp.raise_for_status() | |
questions = q_resp.json() or [] | |
print(f"[DEBUG] Received {len(questions)} questions") # debug | |
if not questions: | |
return ("β No questions found. Check your API_URL.", pd.DataFrame()) | |
# 2) Init agent | |
agent = GAIAAgent(MODEL_ID, hf_token) | |
# 3) Answer each | |
results = [] | |
payload = [] | |
for item in questions: | |
print(f"[DEBUG] Processing task_id={item.get('task_id')}") # debug | |
tid = item.get("task_id") | |
qtxt = item.get("question", "") | |
try: | |
ans = agent.answer(qtxt) | |
except Exception as e: | |
ans = f"ERROR: {e}" | |
print(f"[DEBUG] Error answering: {e}") # debug | |
results.append({"Task ID": tid, "Question": qtxt, "Answer": ans}) | |
payload.append({"task_id": tid, "submitted_answer": ans}) | |
time.sleep(0.5) | |
# 4) Submit | |
print(f"[DEBUG] Submitting payload with {len(payload)} answers") # debug | |
submission = {"username": username, "answers": payload} | |
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) | |
s_resp.raise_for_status() | |
data = s_resp.json() | |
print(f"[DEBUG] Submission response: {data!r}") # debug | |
# 5) Build status text | |
status = ( | |
f"β **Submission Successful!**\n\n" | |
f"**User:** {data.get('username')}\n" | |
f"**Score:** {data.get('score')}% " | |
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" | |
f"**Message:** {data.get('message')}" | |
) | |
return status, pd.DataFrame(results) | |
except Exception as e: | |
tb = traceback.format_exc() | |
print("[ERROR] Unhandled exception:\n", tb) | |
return (f"β Unexpected error:\n{e}", pd.DataFrame()) | |
with gr.Blocks() as demo: | |
gr.Markdown(WELCOME) | |
login = gr.LoginButton() | |
run_btn = gr.Button("βΆοΈ Run GAIA Benchmark") | |
status = gr.Markdown() | |
table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) | |
run_btn.click( | |
fn=run_and_submit_all, | |
inputs=[login], | |
outputs=[status, table_df] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |