Spaces:
Restarting
Restarting
# app.py | |
import os | |
import time | |
import requests | |
import pandas as pd | |
import gradio as gr | |
from smolagents import ( | |
CodeAgent, | |
DuckDuckGoSearchTool, | |
PythonInterpreterTool, | |
InferenceClientModel | |
) | |
# --- Configuration --- | |
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") | |
SPACE_ID = os.getenv("SPACE_ID") # e.g. "your-username/your-space" | |
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Hugging Face token | |
# No need for HF_USERNAME—Gradio OAuthProfile provides it | |
if not all([SPACE_ID, HF_TOKEN]): | |
raise RuntimeError( | |
"Please set the following environment variables in your Space settings:\n" | |
" • SPACE_ID\n" | |
" • HUGGINGFACEHUB_API_TOKEN" | |
) | |
WELCOME_TEXT = """ | |
## Welcome to the GAIA Benchmark Runner 🎉 | |
This challenge is your final hands-on project: | |
- Build an agent and evaluate it on a subset of the GAIA benchmark. | |
- You need **≥30%** to earn your Certificate of Completion. 🏅 | |
- Submit your score and see how you stack up on the Student Leaderboard! | |
""" | |
# --- Agent Definition --- | |
class GAIAAgent: | |
def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"): | |
# Initialize HF Inference client | |
self.model = InferenceClientModel( | |
model_id=model_id, | |
token=HF_TOKEN, | |
provider="hf-inference", | |
timeout=120, | |
temperature=0.2 | |
) | |
# Attach search + code execution tools | |
tools = [ | |
DuckDuckGoSearchTool(), | |
PythonInterpreterTool() | |
] | |
self.agent = CodeAgent( | |
tools=tools, | |
model=self.model, | |
executor_type="local" | |
) | |
def answer(self, question: str, task_file: str = None) -> str: | |
prompt = question | |
if task_file: | |
try: | |
with open(task_file, "r") as f: | |
content = f.read() | |
prompt += f"\n\nAttached file:\n```\n{content}\n```" | |
except: | |
pass | |
return self.agent.run(prompt) | |
# --- Runner & Submission --- | |
def run_and_submit_all(profile: gr.OAuthProfile | None): | |
if profile is None: | |
return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame() | |
username = profile.username | |
# 1) Fetch GAIA questions | |
q_resp = requests.get(f"{API_URL}/questions", timeout=15) | |
q_resp.raise_for_status() | |
questions = q_resp.json() or [] | |
if not questions: | |
return "❌ No questions returned; check your API_URL.", pd.DataFrame() | |
# 2) Initialize your agent | |
agent = GAIAAgent() | |
# 3) Run agent on each question | |
results, payload = [], [] | |
for item in questions: | |
task_id = item.get("task_id") | |
question = item.get("question", "") | |
file_path = item.get("task_file_path") # optional | |
try: | |
answer = agent.answer(question, file_path) | |
except Exception as e: | |
answer = f"ERROR: {e}" | |
results.append({ | |
"Task ID": task_id, | |
"Question": question, | |
"Answer": answer | |
}) | |
payload.append({ | |
"task_id": task_id, | |
"submitted_answer": answer | |
}) | |
time.sleep(0.5) # throttle requests | |
# 4) Submit all answers | |
submission = { | |
"username": username, | |
"agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main", | |
"answers": payload | |
} | |
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) | |
s_resp.raise_for_status() | |
data = s_resp.json() | |
# 5) Build status message | |
status = ( | |
f"✅ **Submission Successful!**\n\n" | |
f"**User:** {data.get('username')}\n" | |
f"**Score:** {data.get('score')}% " | |
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" | |
f"**Message:** {data.get('message')}" | |
) | |
return status, pd.DataFrame(results) | |
# --- Gradio Interface --- | |
with gr.Blocks() as demo: | |
gr.Markdown(WELCOME_TEXT) | |
login = gr.LoginButton() | |
run_btn = gr.Button("▶️ Run Benchmark & Submit") | |
status_out = gr.Markdown() | |
table_out = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True) | |
run_btn.click( | |
fn=run_and_submit_all, | |
inputs=[login], | |
outputs=[status_out, table_out] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |