Spaces:
Restarting
Restarting
File size: 4,417 Bytes
5f30ad7 950d883 10e9b7d 950d883 eccf8e4 3c4371f 950d883 10e9b7d 950d883 5f30ad7 950d883 5f30ad7 950d883 5f30ad7 950d883 5f30ad7 950d883 5f30ad7 950d883 e80aab9 950d883 e80aab9 5f30ad7 950d883 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# app.py
import os
import time
import requests
import pandas as pd
import gradio as gr
from smolagents import (
CodeAgent,
DuckDuckGoSearchTool,
PythonInterpreterTool,
InferenceClientModel
)
# --- Configuration ---
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
SPACE_ID = os.getenv("SPACE_ID") # e.g. "your-username/your-space"
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Hugging Face token
# No need for HF_USERNAME—Gradio OAuthProfile provides it
if not all([SPACE_ID, HF_TOKEN]):
raise RuntimeError(
"Please set the following environment variables in your Space settings:\n"
" • SPACE_ID\n"
" • HUGGINGFACEHUB_API_TOKEN"
)
WELCOME_TEXT = """
## Welcome to the GAIA Benchmark Runner 🎉
This challenge is your final hands-on project:
- Build an agent and evaluate it on a subset of the GAIA benchmark.
- You need **≥30%** to earn your Certificate of Completion. 🏅
- Submit your score and see how you stack up on the Student Leaderboard!
"""
# --- Agent Definition ---
class GAIAAgent:
def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"):
# Initialize HF Inference client
self.model = InferenceClientModel(
model_id=model_id,
token=HF_TOKEN,
provider="hf-inference",
timeout=120,
temperature=0.2
)
# Attach search + code execution tools
tools = [
DuckDuckGoSearchTool(),
PythonInterpreterTool()
]
self.agent = CodeAgent(
tools=tools,
model=self.model,
executor_type="local"
)
def answer(self, question: str, task_file: str = None) -> str:
prompt = question
if task_file:
try:
with open(task_file, "r") as f:
content = f.read()
prompt += f"\n\nAttached file:\n```\n{content}\n```"
except:
pass
return self.agent.run(prompt)
# --- Runner & Submission ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if profile is None:
return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()
username = profile.username
# 1) Fetch GAIA questions
q_resp = requests.get(f"{API_URL}/questions", timeout=15)
q_resp.raise_for_status()
questions = q_resp.json() or []
if not questions:
return "❌ No questions returned; check your API_URL.", pd.DataFrame()
# 2) Initialize your agent
agent = GAIAAgent()
# 3) Run agent on each question
results, payload = [], []
for item in questions:
task_id = item.get("task_id")
question = item.get("question", "")
file_path = item.get("task_file_path") # optional
try:
answer = agent.answer(question, file_path)
except Exception as e:
answer = f"ERROR: {e}"
results.append({
"Task ID": task_id,
"Question": question,
"Answer": answer
})
payload.append({
"task_id": task_id,
"submitted_answer": answer
})
time.sleep(0.5) # throttle requests
# 4) Submit all answers
submission = {
"username": username,
"agent_code": f"https://huggingface.co./spaces/{SPACE_ID}/tree/main",
"answers": payload
}
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
s_resp.raise_for_status()
data = s_resp.json()
# 5) Build status message
status = (
f"✅ **Submission Successful!**\n\n"
f"**User:** {data.get('username')}\n"
f"**Score:** {data.get('score')}% "
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
f"**Message:** {data.get('message')}"
)
return status, pd.DataFrame(results)
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown(WELCOME_TEXT)
login = gr.LoginButton()
run_btn = gr.Button("▶️ Run Benchmark & Submit")
status_out = gr.Markdown()
table_out = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True)
run_btn.click(
fn=run_and_submit_all,
inputs=[login],
outputs=[status_out, table_out]
)
if __name__ == "__main__":
demo.launch()
|