framsouza commited on
Commit
950d883
·
verified ·
1 Parent(s): 5f30ad7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -54
app.py CHANGED
@@ -1,71 +1,149 @@
1
  # app.py
 
2
  import os
3
- from dotenv import load_dotenv
4
- import gradio as gr
5
  import requests
6
  import pandas as pd
 
7
 
8
- from agent import GeminiAgent
9
-
10
- # Load environment variables
11
- load_dotenv()
12
-
13
- API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
14
- SPACE_ID = os.getenv("SPACE_ID")
15
- GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
16
-
17
- class BasicAgent:
18
- def __init__(self, api_key: str):
19
- if not api_key:
20
- raise ValueError("GOOGLE_API_KEY environment variable not set.")
21
- self.agent = GeminiAgent(api_key)
22
-
23
- def __call__(self, question: str) -> str:
24
- return self.agent.run(question)
25
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- def run_and_submit(profile):
28
- if not profile:
29
- return "Please log in to Hugging Face.", None
30
  username = profile.username
31
 
32
- # Fetch questions
33
- resp = requests.get(f"{API_URL}/questions", timeout=15)
34
- resp.raise_for_status()
35
- questions = resp.json() or []
36
-
37
- # Run agent
38
- basic = BasicAgent(GOOGLE_API_KEY)
39
- results, answers = [], []
40
- for q in questions:
41
- ans = basic(q.get("question", ""))
42
- results.append({"Task ID": q.get("task_id"), "Question": q.get("question"), "Answer": ans})
43
- answers.append({"task_id": q.get("task_id"), "submitted_answer": ans})
44
-
45
- # Submit answers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  submission = {
47
- "username": username,
48
  "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
49
- "answers": answers
50
  }
51
- resp2 = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
52
- resp2.raise_for_status()
53
- data = resp2.json()
54
- status = f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})"
 
 
 
 
 
 
 
 
55
 
56
  return status, pd.DataFrame(results)
57
 
58
- # Gradio UI
 
59
  with gr.Blocks() as demo:
60
- gr.Markdown("# Basic Agent Evaluation Runner")
61
- gr.LoginButton()
62
- run_btn = gr.Button("Run Evaluation & Submit All Answers")
63
- status_out = gr.Textbox(label="Run Status", lines=5)
64
- table_out = gr.DataFrame(label="Results")
65
- run_btn.click(run_and_submit, outputs=[status_out, table_out])
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
- demo.launch(debug=True)
69
-
70
-
71
-
 
1
  # app.py
2
+
3
  import os
4
+ import time
 
5
  import requests
6
  import pandas as pd
7
+ import gradio as gr
8
 
9
+ from smolagents import (
10
+ CodeAgent,
11
+ DuckDuckGoSearchTool,
12
+ PythonInterpreterTool,
13
+ InferenceClientModel
14
+ )
15
+
16
+ # --- Configuration ---
17
+ API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space")
18
+ SPACE_ID = os.getenv("SPACE_ID") # e.g. "your-username/your-space"
19
+ HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Hugging Face token
20
+ # No need for HF_USERNAME—Gradio OAuthProfile provides it
21
+
22
+ if not all([SPACE_ID, HF_TOKEN]):
23
+ raise RuntimeError(
24
+ "Please set the following environment variables in your Space settings:\n"
25
+ " • SPACE_ID\n"
26
+ " • HUGGINGFACEHUB_API_TOKEN"
27
+ )
28
+
29
+ WELCOME_TEXT = """
30
+ ## Welcome to the GAIA Benchmark Runner 🎉
31
+
32
+ This challenge is your final hands-on project:
33
+ - Build an agent and evaluate it on a subset of the GAIA benchmark.
34
+ - You need **≥30%** to earn your Certificate of Completion. 🏅
35
+ - Submit your score and see how you stack up on the Student Leaderboard!
36
+ """
37
+
38
+ # --- Agent Definition ---
39
+ class GAIAAgent:
40
+ def __init__(self, model_id="meta-llama/Llama-3-70B-Instruct"):
41
+ # Initialize HF Inference client
42
+ self.model = InferenceClientModel(
43
+ model_id=model_id,
44
+ token=HF_TOKEN,
45
+ provider="hf-inference",
46
+ timeout=120,
47
+ temperature=0.2
48
+ )
49
+ # Attach search + code execution tools
50
+ tools = [
51
+ DuckDuckGoSearchTool(),
52
+ PythonInterpreterTool()
53
+ ]
54
+ self.agent = CodeAgent(
55
+ tools=tools,
56
+ model=self.model,
57
+ executor_type="local"
58
+ )
59
+
60
+ def answer(self, question: str, task_file: str = None) -> str:
61
+ prompt = question
62
+ if task_file:
63
+ try:
64
+ with open(task_file, "r") as f:
65
+ content = f.read()
66
+ prompt += f"\n\nAttached file:\n```\n{content}\n```"
67
+ except:
68
+ pass
69
+ return self.agent.run(prompt)
70
+
71
+ # --- Runner & Submission ---
72
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
73
+ if profile is None:
74
+ return "⚠️ Please log in with your Hugging Face account.", pd.DataFrame()
75
 
 
 
 
76
  username = profile.username
77
 
78
+ # 1) Fetch GAIA questions
79
+ q_resp = requests.get(f"{API_URL}/questions", timeout=15)
80
+ q_resp.raise_for_status()
81
+ questions = q_resp.json() or []
82
+ if not questions:
83
+ return "❌ No questions returned; check your API_URL.", pd.DataFrame()
84
+
85
+ # 2) Initialize your agent
86
+ agent = GAIAAgent()
87
+
88
+ # 3) Run agent on each question
89
+ results, payload = [], []
90
+ for item in questions:
91
+ task_id = item.get("task_id")
92
+ question = item.get("question", "")
93
+ file_path = item.get("task_file_path") # optional
94
+
95
+ try:
96
+ answer = agent.answer(question, file_path)
97
+ except Exception as e:
98
+ answer = f"ERROR: {e}"
99
+
100
+ results.append({
101
+ "Task ID": task_id,
102
+ "Question": question,
103
+ "Answer": answer
104
+ })
105
+ payload.append({
106
+ "task_id": task_id,
107
+ "submitted_answer": answer
108
+ })
109
+
110
+ time.sleep(0.5) # throttle requests
111
+
112
+ # 4) Submit all answers
113
  submission = {
114
+ "username": username,
115
  "agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main",
116
+ "answers": payload
117
  }
118
+ s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60)
119
+ s_resp.raise_for_status()
120
+ data = s_resp.json()
121
+
122
+ # 5) Build status message
123
+ status = (
124
+ f"✅ **Submission Successful!**\n\n"
125
+ f"**User:** {data.get('username')}\n"
126
+ f"**Score:** {data.get('score')}% "
127
+ f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n"
128
+ f"**Message:** {data.get('message')}"
129
+ )
130
 
131
  return status, pd.DataFrame(results)
132
 
133
+
134
+ # --- Gradio Interface ---
135
  with gr.Blocks() as demo:
136
+ gr.Markdown(WELCOME_TEXT)
137
+ login = gr.LoginButton()
138
+ run_btn = gr.Button("▶️ Run Benchmark & Submit")
139
+ status_out = gr.Markdown()
140
+ table_out = gr.Dataframe(headers=["Task ID","Question","Answer"], wrap=True)
141
+
142
+ run_btn.click(
143
+ fn=run_and_submit_all,
144
+ inputs=[login],
145
+ outputs=[status_out, table_out]
146
+ )
147
 
148
  if __name__ == "__main__":
149
+ demo.launch()