MrArray22 commited on
Commit
9ccb506
·
verified ·
1 Parent(s): 8d51314

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -165
app.py CHANGED
@@ -1,146 +1,44 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from dotenv import load_dotenv
7
- from openai import OpenAI
8
- from tenacity import retry, stop_after_attempt, wait_exponential
9
-
10
- # Load environment variables
11
- load_dotenv()
12
-
13
  # (Keep Constants as is)
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
- OPENAI_MODEL = "openai/gpt-4.1" # or "gpt-3.5-turbo" based on your preference
17
 
18
 
19
  # --- Basic Agent Definition ---
20
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
21
  class BasicAgent:
22
  def __init__(self):
23
- """Initialize the agent with OpenAI client and setup."""
24
- print("BasicAgent initializing...")
25
- self.client = OpenAI(
26
- api_key=os.environ["API_KEY"],
27
- base_url="https://models.github.ai/inference",
 
 
 
 
 
28
  )
29
- print("BasicAgent initialized successfully.")
30
-
31
- @retry(
32
- stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)
33
- )
34
- def _get_completion(self, prompt: str) -> str:
35
- """Get completion from OpenAI with retry logic."""
36
- try:
37
- response = self.client.chat.completions.create(
38
- model=OPENAI_MODEL,
39
- messages=[
40
- {
41
- "role": "developer",
42
- "content": """
43
- You are an expert research assistant that provides precise, accurate answers. Before responding, use this hidden planning phase (which will not be shown to users):
44
-
45
- ```
46
- <planning>
47
- 1. Classify the question type:
48
- - Arithmetic/mathematical calculation
49
- - Factual lookup (dates, codes, definitions)
50
- - Complex knowledge (requires synthesis of multiple facts)
51
- - Subjective/opinion-based (requires reasoning with caveats)
52
-
53
- 2. For each type:
54
- - Arithmetic: Calculate step-by-step to ensure accuracy
55
- - Factual lookup: Identify the specific data point needed
56
- - Complex knowledge: Break down into key components and relationships
57
- - Subjective: Note major perspectives and evidence for each
58
-
59
- 3. Check for potential ambiguities or misinterpretations
60
- 4. Formulate the most precise answer possible
61
- </planning>
62
- ```
63
-
64
- ## Response Format
65
-
66
- After your planning, provide your answer in this format:
67
-
68
- **Answer:** [Your concise, precise answer]
69
-
70
- For factual questions, include only the exact information requested - no extra text.
71
- For complex questions, provide a concise, well-structured response focused on accuracy.
72
-
73
- ## Examples
74
-
75
- **Q: What is 493 × 27?**
76
- <planning>Arithmetic calculation: 493 × 27 = (493 × 20) + (493 × 7) = 9,860 + 3,451 = 13,311</planning>
77
- **Answer:** 13,311
78
-
79
- **Q: Which country has the smallest land area in South America?**
80
- <planning>Factual lookup: South American countries by land area. Smallest is Suriname at 63,251 square miles.</planning>
81
- **Answer:** Suriname
82
-
83
- **Q: How does atmospheric carbon dioxide affect ocean acidity?**
84
- <planning>Complex knowledge question requiring synthesis of chemistry concepts...</planning>
85
- **Answer:** Atmospheric CO₂ dissolves in seawater forming carbonic acid (H₂CO₃), which releases hydrogen ions and lowers pH. This process, called ocean acidification, has increased ocean acidity by approximately 30% since the Industrial Revolution.""",
86
- },
87
- {"role": "user", "content": prompt},
88
- ],
89
- temperature=0.5, # Lower temperature for more consistent outputs
90
- # max_tokens=1000,
91
- )
92
- return response.choices[0].message.content.strip()
93
- except Exception as e:
94
- print(f"Error in OpenAI API call: {e}")
95
- raise
96
-
97
- def _preprocess_question(self, question: str) -> str:
98
- """Preprocess the question to enhance clarity and context."""
99
- enhanced_prompt = f"""Please analyze and answer the following question from the GAIA benchmark.
100
- Question: {question}
101
-
102
- Provide a clear, specific answer that can be evaluated through exact matching.
103
- If the question requires multiple steps, please show your reasoning but ensure the final answer is clearly stated.
104
- """
105
- return enhanced_prompt
106
-
107
  def __call__(self, question: str) -> str:
108
- """Process the question and return an answer."""
109
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
 
 
110
 
111
- try:
112
- # Preprocess the question
113
- enhanced_prompt = self._preprocess_question(question)
114
-
115
- # Get completion from OpenAI
116
- response = self._get_completion(enhanced_prompt)
117
-
118
- # Extract the final answer
119
- # If the response contains multiple lines or explanations,
120
- # we'll try to extract just the final answer
121
- answer_lines = response.strip().split("\n")
122
- final_answer = answer_lines[-1].strip()
123
-
124
- # Log the response for debugging
125
- print(f"Agent generated answer: {final_answer[:100]}...")
126
-
127
- return final_answer
128
-
129
- except Exception as e:
130
- print(f"Error processing question: {e}")
131
- return f"Error: {str(e)}"
132
-
133
-
134
- def run_and_submit_all(profile: gr.OAuthProfile | None):
135
  """
136
  Fetches all questions, runs the BasicAgent on them, submits all answers,
137
  and displays the results.
138
  """
139
  # --- Determine HF Space Runtime URL and Repo URL ---
140
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
141
 
142
  if profile:
143
- username = f"{profile.username}"
144
  print(f"User logged in: {username}")
145
  else:
146
  print("User not logged in.")
@@ -167,16 +65,16 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
167
  response.raise_for_status()
168
  questions_data = response.json()
169
  if not questions_data:
170
- print("Fetched questions list is empty.")
171
- return "Fetched questions list is empty or invalid format.", None
172
  print(f"Fetched {len(questions_data)} questions.")
173
  except requests.exceptions.RequestException as e:
174
  print(f"Error fetching questions: {e}")
175
  return f"Error fetching questions: {e}", None
176
  except requests.exceptions.JSONDecodeError as e:
177
- print(f"Error decoding JSON response from questions endpoint: {e}")
178
- print(f"Response text: {response.text[:500]}")
179
- return f"Error decoding server response for questions: {e}", None
180
  except Exception as e:
181
  print(f"An unexpected error occurred fetching questions: {e}")
182
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -193,36 +91,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
193
  continue
194
  try:
195
  submitted_answer = agent(question_text)
196
- answers_payload.append(
197
- {"task_id": task_id, "submitted_answer": submitted_answer}
198
- )
199
- results_log.append(
200
- {
201
- "Task ID": task_id,
202
- "Question": question_text,
203
- "Submitted Answer": submitted_answer,
204
- }
205
- )
206
  except Exception as e:
207
- print(f"Error running agent on task {task_id}: {e}")
208
- results_log.append(
209
- {
210
- "Task ID": task_id,
211
- "Question": question_text,
212
- "Submitted Answer": f"AGENT ERROR: {e}",
213
- }
214
- )
215
 
216
  if not answers_payload:
217
  print("Agent did not produce any answers to submit.")
218
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
219
 
220
- # 4. Prepare Submission
221
- submission_data = {
222
- "username": username.strip(),
223
- "agent_code": agent_code,
224
- "answers": answers_payload,
225
- }
226
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
227
  print(status_update)
228
 
@@ -276,11 +156,9 @@ with gr.Blocks() as demo:
276
  gr.Markdown(
277
  """
278
  **Instructions:**
279
-
280
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
281
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
282
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
283
-
284
  ---
285
  **Disclaimers:**
286
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
@@ -292,19 +170,20 @@ with gr.Blocks() as demo:
292
 
293
  run_button = gr.Button("Run Evaluation & Submit All Answers")
294
 
295
- status_output = gr.Textbox(
296
- label="Run Status / Submission Result", lines=5, interactive=False
297
- )
298
  # Removed max_rows=10 from DataFrame constructor
299
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
300
 
301
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
302
 
303
  if __name__ == "__main__":
304
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
305
  # Check for SPACE_HOST and SPACE_ID at startup for information
306
  space_host_startup = os.getenv("SPACE_HOST")
307
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
308
 
309
  if space_host_startup:
310
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -312,18 +191,14 @@ if __name__ == "__main__":
312
  else:
313
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
314
 
315
- if space_id_startup: # Print repo URLs if SPACE_ID is found
316
  print(f"✅ SPACE_ID found: {space_id_startup}")
317
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
318
- print(
319
- f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
320
- )
321
  else:
322
- print(
323
- "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
324
- )
325
 
326
- print("-" * (60 + len(" App Starting ")) + "\n")
327
 
328
  print("Launching Gradio Interface for Basic Agent Evaluation...")
329
- demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
 
 
 
 
 
 
6
  # (Keep Constants as is)
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
9
 
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
+ print("BasicAgent initialized.")
16
+ # Initialize the model
17
+ #model = HfApiModel()
18
+ model = OpenAIServerModel(model_id="openai/gpt-4.1",api_key=os.environ["API_KEY"],api_base="https://models.github.ai/inference")
19
+ # Initialize the search tool
20
+ search_tool = DuckDuckGoSearchTool()
21
+ # Initialize Agent
22
+ self.agent = CodeAgent(
23
+ model = model,
24
+ tools=[search_tool]
25
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def __call__(self, question: str) -> str:
 
27
  print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ fixed_answer =self.agent.run(question)
29
+ print(f"Agent returning fixed answer: {fixed_answer}")
30
+ return fixed_answer
31
 
32
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  """
34
  Fetches all questions, runs the BasicAgent on them, submits all answers,
35
  and displays the results.
36
  """
37
  # --- Determine HF Space Runtime URL and Repo URL ---
38
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
39
 
40
  if profile:
41
+ username= f"{profile.username}"
42
  print(f"User logged in: {username}")
43
  else:
44
  print("User not logged in.")
 
65
  response.raise_for_status()
66
  questions_data = response.json()
67
  if not questions_data:
68
+ print("Fetched questions list is empty.")
69
+ return "Fetched questions list is empty or invalid format.", None
70
  print(f"Fetched {len(questions_data)} questions.")
71
  except requests.exceptions.RequestException as e:
72
  print(f"Error fetching questions: {e}")
73
  return f"Error fetching questions: {e}", None
74
  except requests.exceptions.JSONDecodeError as e:
75
+ print(f"Error decoding JSON response from questions endpoint: {e}")
76
+ print(f"Response text: {response.text[:500]}")
77
+ return f"Error decoding server response for questions: {e}", None
78
  except Exception as e:
79
  print(f"An unexpected error occurred fetching questions: {e}")
80
  return f"An unexpected error occurred fetching questions: {e}", None
 
91
  continue
92
  try:
93
  submitted_answer = agent(question_text)
94
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
95
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
96
  except Exception as e:
97
+ print(f"Error running agent on task {task_id}: {e}")
98
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
99
 
100
  if not answers_payload:
101
  print("Agent did not produce any answers to submit.")
102
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
103
 
104
+ # 4. Prepare Submission
105
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
106
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
107
  print(status_update)
108
 
 
156
  gr.Markdown(
157
  """
158
  **Instructions:**
 
159
  1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
160
  2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
161
  3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
162
  ---
163
  **Disclaimers:**
164
  Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
 
170
 
171
  run_button = gr.Button("Run Evaluation & Submit All Answers")
172
 
173
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
174
  # Removed max_rows=10 from DataFrame constructor
175
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
176
 
177
+ run_button.click(
178
+ fn=run_and_submit_all,
179
+ outputs=[status_output, results_table]
180
+ )
181
 
182
  if __name__ == "__main__":
183
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
184
  # Check for SPACE_HOST and SPACE_ID at startup for information
185
  space_host_startup = os.getenv("SPACE_HOST")
186
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
187
 
188
  if space_host_startup:
189
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
191
  else:
192
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
193
 
194
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
195
  print(f"✅ SPACE_ID found: {space_id_startup}")
196
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
197
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
198
  else:
199
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
200
 
201
+ print("-"*(60 + len(" App Starting ")) + "\n")
202
 
203
  print("Launching Gradio Interface for Basic Agent Evaluation...")
204
+ demo.launch(debug=True, share=False)