innovation64 commited on
Commit
3b38028
·
verified ·
1 Parent(s): 3cb22f2

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +219 -541
  2. requirements.txt +6 -17
app.py CHANGED
@@ -1,519 +1,221 @@
 
1
  import os
2
  import gradio as gr
3
  import requests
 
 
 
 
 
 
 
4
  import pandas as pd
5
- import time
6
  import re
7
- import json
8
- import traceback
9
- import tempfile
10
- from urllib.parse import urlparse
11
- from dotenv import load_dotenv
12
-
13
- # Import necessary components from smolagents
14
- from smolagents import (
15
- CodeAgent, # Using CodeAgent as the core agent
16
- DuckDuckGoSearchTool,
17
- OpenAIServerModel,
18
- PythonInterpreterTool,
19
- tool # Import tool decorator
20
- )
21
- from typing import List, Dict, Any, Optional, Tuple
22
-
23
- # Load environment variables
24
- load_dotenv()
25
 
 
26
  # --- Constants ---
27
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
28
 
29
- # --- Custom Tool Definitions ---
30
- @tool
31
- def reverse_text(text: str) -> str:
32
  """
33
- Reverses a text string. Useful for handling reversed text questions.
34
-
35
- Args:
36
- text: The text to reverse
37
-
38
- Returns:
39
- The reversed text
40
  """
41
- return text[::-1]
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- @tool
44
- def analyze_csv_file(file_path: str, query: str) -> str:
45
- """
46
- Analyze a CSV file using pandas and answer a question about it.
47
-
48
- Args:
49
- file_path: Path to the CSV file
50
- query: Question about the data
51
-
52
- Returns:
53
- Analysis result or error message
54
- """
55
- try:
56
- import pandas as pd
57
-
58
- # Read the CSV file
59
- df = pd.read_csv(file_path)
60
-
61
- # Run various analyses based on the query
62
- result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
63
- result += f"Columns: {', '.join(df.columns)}\n\n"
64
-
65
- # Add summary statistics
66
- result += "Summary statistics:\n"
67
- result += str(df.describe())
68
-
69
- return result
70
- except ImportError:
71
- return "Error: pandas is not installed. Please install it with 'pip install pandas'."
72
- except Exception as e:
73
- return f"Error analyzing CSV file: {str(e)}"
74
 
75
- @tool
76
- def analyze_excel_file(file_path: str, query: str) -> str:
77
- """
78
- Analyze an Excel file using pandas and answer a question about it.
79
-
80
- Args:
81
- file_path: Path to the Excel file
82
- query: Question about the data
83
-
84
- Returns:
85
- Analysis result or error message
86
- """
87
- try:
88
- import pandas as pd
89
-
90
- # Read the Excel file
91
- df = pd.read_excel(file_path)
92
-
93
- # Run various analyses based on the query
94
- result = f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
95
- result += f"Columns: {', '.join(df.columns)}\n\n"
96
-
97
- # Add summary statistics
98
- result += "Summary statistics:\n"
99
- result += str(df.describe())
100
-
101
- return result
102
- except ImportError:
103
- return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'."
104
- except Exception as e:
105
- return f"Error analyzing Excel file: {str(e)}"
106
 
107
- @tool
108
- def parse_table(table_text: str) -> str:
109
- """
110
- Parses an ASCII or markdown table into a structured format
111
-
112
- Args:
113
- table_text: The raw table string
114
-
115
- Returns:
116
- The parsed table (as a string representation)
117
- """
118
- try:
119
- import pandas as pd
120
- from io import StringIO
121
- # Clean pipes and extra spaces
122
- clean = re.sub(r"^\||\|$", "", table_text.strip(), flags=re.MULTILINE)
123
- df = pd.read_csv(StringIO(clean), sep=r"\s*\|\s*", engine="python")
124
- # Return DataFrame as string
125
- return df.to_string()
126
- except Exception as e:
127
- return f"Error parsing table: {str(e)}"
128
 
129
- @tool
130
- def browse_webpage(url: str) -> str:
131
- """
132
- Browses the web to fetch information from websites
133
-
134
- Args:
135
- url: The URL to visit
136
-
137
- Returns:
138
- The webpage content
139
- """
140
- try:
141
- import requests
142
- from bs4 import BeautifulSoup
143
-
144
- headers = {
145
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
146
- }
147
-
148
- response = requests.get(url, headers=headers, timeout=10)
149
-
150
- if response.status_code != 200:
151
- return f"Error: Failed to fetch the webpage. Status code: {response.status_code}"
152
-
153
- # Parse the HTML content
154
- soup = BeautifulSoup(response.text, 'html.parser')
155
-
156
- # Remove script and style elements
157
- for script in soup(["script", "style"]):
158
- script.extract()
159
-
160
- # Get the text content
161
- text = soup.get_text()
162
-
163
- # Clean up the text
164
- lines = (line.strip() for line in text.splitlines())
165
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
166
- text = '\n'.join(chunk for chunk in chunks if chunk)
167
-
168
- # Truncate if too long
169
- if len(text) > 10000:
170
- text = text[:10000] + "...\n[Content truncated due to length]"
171
-
172
- return text
173
-
174
- except Exception as e:
175
- return f"Error browsing the web: {str(e)}"
176
 
177
- @tool
178
- def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
179
- """
180
- Save content to a temporary file and return the path.
181
- Useful for processing files from the GAIA API.
182
-
183
- Args:
184
- content: The content to save to the file
185
- filename: Optional filename, will generate a random name if not provided
186
-
187
- Returns:
188
- Path to the saved file
189
- """
190
- temp_dir = tempfile.gettempdir()
191
- if filename is None:
192
- temp_file = tempfile.NamedTemporaryFile(delete=False)
193
- filepath = temp_file.name
194
- else:
195
- filepath = os.path.join(temp_dir, filename)
196
-
197
- # Write content to the file
198
- with open(filepath, 'w') as f:
199
- f.write(content)
200
-
201
- return f"File saved to {filepath}. You can read this file to process its contents."
202
-
203
- @tool
204
- def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  """
206
- Download a file from a URL and save it to a temporary location.
207
-
208
- Args:
209
- url: The URL to download from
210
- filename: Optional filename, will generate one based on URL if not provided
211
-
212
- Returns:
213
- Path to the downloaded file
214
  """
 
215
  try:
216
- # Parse URL to get filename if not provided
217
- if not filename:
218
- path = urlparse(url).path
219
- filename = os.path.basename(path)
220
- if not filename:
221
- # Generate a random name if we couldn't extract one
222
- import uuid
223
- filename = f"downloaded_{uuid.uuid4().hex[:8]}"
224
-
225
- # Create temporary file
226
- temp_dir = tempfile.gettempdir()
227
- filepath = os.path.join(temp_dir, filename)
228
-
229
- # Download the file
230
- response = requests.get(url, stream=True)
231
- response.raise_for_status()
232
-
233
- # Save the file
234
- with open(filepath, 'wb') as f:
235
- for chunk in response.iter_content(chunk_size=8192):
236
- f.write(chunk)
237
-
238
- return f"File downloaded to {filepath}. You can now process this file."
239
- except Exception as e:
240
- return f"Error downloading file: {str(e)}"
241
-
242
- # --- GAIA Agent Enhanced System Prompt ---
243
- ENHANCED_SYSTEM_PROMPT = """You are an expert AI assistant for the GAIA benchmark.
244
-
245
- IMPORTANT GUIDELINES:
246
- 1. Provide EXACT answers with no explanations or extra text.
247
- 2. Only return the final answer, not your reasoning.
248
- 3. For lists, alphabetize and provide comma-separated values.
249
- 4. For numerical answers, return the number as a string.
250
- 5. For chess positions, analyze the board carefully and provide the winning move.
251
- 6. For "countries that no longer exist" questions, consider: USSR, East Germany, Yugoslavia, Czechoslovakia.
252
- 7. For reversed text questions, handle backwards text by reversing it first, then answer directly. For example, if the reversed text asks for the opposite of "left", answer "right" not the reversed text.
253
- 8. For mathematical calculations, perform the calculation precisely.
254
- 9. For web research tasks, verify from multiple sources, and return only the exact answer.
255
- 10. For file analysis, extract only the specific information requested.
256
- 11. For image analysis, describe what you see in detail.
257
- 12. For YouTube videos, try to get the transcript if possible.
258
-
259
- SPECIAL CASES:
260
- 1. When asked about recent dates, use the current date (April 25, 2025) as reference.
261
- 2. If a question contains a URL, extract information from it.
262
- 3. If a question requires using a web service that outputs different values each time (like exchange rates), take the most common value.
263
- 4. For calculations involving current data, perform the calculation after fetching the most up-to-date information.
264
- 5. For problems that require complex reasoning, break them down into steps.
265
-
266
- KNOWN QUESTIONS:
267
- - If asked about Mercedes Sosa albums between 2000 and 2009, the answer is "3".
268
- - If asked about a Malko Competition recipient from a country that no longer exists, the answer is "Pavel".
269
- - If asked about Vietnamese specimens and Nedoshiva, the answer is "Saint Petersburg".
270
- - If asked about an equine veterinarian and chemistry materials, the answer is "Jones".
271
- - If text is reversed and asks for the opposite of "left", the answer is "right".
272
-
273
- TASK APPROACH:
274
- 1. Carefully analyze the question to determine the exact information needed.
275
- 2. Choose the most appropriate approach for the task.
276
- 3. If needed, break complex tasks into smaller steps.
277
- 4. Double-check your answer before submitting.
278
- 5. Return ONLY the final answer, with no explanations or reasoning.
279
-
280
- Remember: precision and exactness are crucial. Provide only the requested information in the simplest possible format.
281
- """
282
-
283
- # --- Main Application Class ---
284
- class GAIABenchmarkAgent:
285
- """GAIA Benchmark Agent using CodeAgent"""
286
-
287
  def __init__(self):
288
- print("Initializing GAIA Benchmark Agent...")
289
-
290
- try:
291
- # Check for API key
292
- api_key = os.environ.get("OPENAI_API_KEY")
293
- if not api_key:
294
- print("WARNING: OPENAI_API_KEY environment variable not set!")
295
- raise ValueError("No OpenAI API key found, please set the OPENAI_API_KEY environment variable")
296
-
297
- # Determine which model to use
298
- model_id = os.environ.get("AGENT_MODEL_ID", "gpt-3.5-turbo")
299
- print(f"Using model: {model_id}")
300
-
301
- # Initialize OpenAI model
302
- model = OpenAIServerModel(
303
- model_id=model_id,
304
- api_key=api_key,
305
- temperature=0.1
306
- )
307
-
308
- # Initialize tools list
309
- tools = [
310
- DuckDuckGoSearchTool(), # Web search
311
- PythonInterpreterTool(), # Python interpreter
312
- reverse_text, # Text reversal
313
- analyze_csv_file, # CSV analysis
314
- analyze_excel_file, # Excel analysis
315
- parse_table, # Table parsing
316
- browse_webpage, # Web browsing
317
- save_and_read_file, # File operations
318
- download_file_from_url # File download
319
- ]
320
-
321
- # Create CodeAgent
322
- self.agent = CodeAgent(
323
- model=model,
324
- tools=tools,
325
- system_prompt=ENHANCED_SYSTEM_PROMPT,
326
- verbose=True
327
- )
328
-
329
- print("GAIA Benchmark Agent initialized successfully.")
330
- except Exception as e:
331
- print(f"Error initializing agent: {e}")
332
- traceback.print_exc()
333
- self.agent = None
334
- raise
335
-
336
  def __call__(self, question: str) -> str:
337
- """Process a GAIA benchmark question and return the answer"""
338
  print(f"Agent received question (first 50 chars): {question[:50]}...")
339
-
340
- try:
341
- # Process special cases first
342
- direct_answer = self._check_special_cases(question)
343
- if direct_answer:
344
- print(f"Direct answer for special case: {direct_answer}")
345
- return direct_answer
346
-
347
- # Use CodeAgent to process the question
348
- start_time = time.time()
349
- answer = self.agent.run(question, max_steps=3)
350
- end_time = time.time()
351
-
352
- # Process the answer
353
- # Sometimes CodeAgent returns a string, sometimes it has additional step info
354
- # Here we prioritize extracting from final_answer if available, otherwise use last step result
355
- if isinstance(answer, dict) and "final_answer" in answer:
356
- final_answer = answer["final_answer"]
357
- elif isinstance(answer, dict) and "steps" in answer and answer["steps"]:
358
- # Get the result from the last step
359
- last_step = answer["steps"][-1]
360
- if "output" in last_step:
361
- final_answer = last_step["output"]
362
- else:
363
- final_answer = str(last_step)
364
- else:
365
- final_answer = str(answer)
366
-
367
- # Clean the answer, removing common prefixes
368
- final_answer = self._clean_answer(final_answer)
369
-
370
- print(f"Agent returned answer (first 50 chars): {final_answer[:50] if final_answer else 'None'}... Time taken: {end_time - start_time:.2f}s")
371
- return final_answer
372
- except Exception as e:
373
- print(f"Error processing question: {e}")
374
- traceback.print_exc()
375
-
376
- # Fallback mechanisms for specific error cases
377
- fallback_answer = self._get_fallback_answer(question, e)
378
- return fallback_answer
379
-
380
- def _check_special_cases(self, question: str) -> Optional[str]:
381
- """Check for special cases and known questions, return direct answers"""
382
- # Special handling for reversed text with "answer" reversed
383
- if ".rewsna eht sa " in question:
384
- return "right"
385
-
386
- # Special handling for known questions
387
- if "Mercedes Sosa" in question and "2000" in question and "2009" in question:
388
- return "3"
389
-
390
- if "Malko Competition" in question and "country that no longer exist" in question:
391
- return "Pavel"
392
-
393
- if "Vietnamese specimens" in question and "Nedoshivina" in question:
394
- return "Saint Petersburg"
395
-
396
- if "equine veterinarian" in question and "chemistry materials" in question:
397
- return "Jones"
398
-
399
- # Media content handling
400
- if any(term in question.lower() for term in ["youtube.com", "youtube video", "watch?v="]):
401
- return "Unable to access video content directly. Please provide a transcript or description."
402
-
403
- if any(term in question.lower() for term in ["mp3", "audio file", "recording"]):
404
- return "Unable to process audio content directly. Please provide a transcript if available."
405
-
406
- if any(term in question.lower() for term in ["jpg", "png", "image file"]):
407
- return "Unable to analyze image content directly. Please provide a detailed description."
408
-
409
- # File processing
410
- if any(term in question.lower() for term in ["excel file", "xlsx", "spreadsheet"]):
411
- return "Unable to access the Excel file directly. Please provide the data in another format."
412
-
413
- if any(term in question.lower() for term in ["pdf file", "pdf document"]):
414
- return "Unable to access the PDF file directly. Please provide the data in another format."
415
-
416
- if any(term in question.lower() for term in ["csv file", "comma-separated values"]):
417
- return "Unable to access the CSV file directly. Please provide the data in another format."
418
-
419
- # Chess position handling
420
- if "chess position" in question.lower() and "image" in question.lower():
421
- return "Unable to analyze the chess position without a description or tool support."
422
-
423
- return None
424
-
425
- def _get_fallback_answer(self, question: str, error: Exception) -> str:
426
- """Provide fallback answers for specific error cases"""
427
- if ".rewsna eht sa " in question:
428
- return "right"
429
-
430
- if any(term in question.lower() for term in ["excel", "spreadsheet", "file"]):
431
- return "Unable to access the file directly."
432
-
433
- if "chess position" in question.lower():
434
- return "Unable to analyze the chess position."
435
-
436
- if any(term in question.lower() for term in ["youtube", "video"]):
437
- return "Unable to access video content directly."
438
-
439
- return f"Error processing question: {str(error)}"
440
-
441
- def _clean_answer(self, answer: Any) -> str:
442
- """
443
- Clean up the answer to remove common prefixes and formatting
444
- """
445
- # Convert non-string types to strings
446
- if not isinstance(answer, str):
447
- # Handle numeric types (float, int)
448
- if isinstance(answer, float):
449
- # Format floating point numbers properly
450
- if answer.is_integer():
451
- formatted_answer = str(int(answer))
452
- else:
453
- formatted_answer = str(answer)
454
- return formatted_answer
455
- elif isinstance(answer, int):
456
- return str(answer)
457
- else:
458
- # For any other type
459
- return str(answer)
460
-
461
- # Now we know answer is a string, so we can safely use string methods
462
- # Normalize whitespace
463
- answer = answer.strip()
464
-
465
- # Remove common prefixes and formatting that models add
466
- prefixes_to_remove = [
467
- "The answer is ",
468
- "Answer: ",
469
- "Final answer: ",
470
- "The result is ",
471
- "To answer this question: ",
472
- "Based on the information provided, ",
473
- "According to the information: ",
474
- ]
475
-
476
- for prefix in prefixes_to_remove:
477
- if answer.lower().startswith(prefix.lower()):
478
- answer = answer[len(prefix):].strip()
479
-
480
- # Remove quotes if they wrap the entire answer
481
- if (answer.startswith('"') and answer.endswith('"')) or (answer.startswith("'") and answer.endswith("'")):
482
- answer = answer[1:-1].strip()
483
-
484
- return answer
485
-
486
-
487
- def run_and_submit_all(profile: gr.OAuthProfile | None):
488
  """
489
- Fetches all questions, runs the GAIA Benchmark Agent on them, submits all answers,
490
  and displays the results.
491
  """
492
  # --- Determine HF Space Runtime URL and Repo URL ---
493
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending code link
494
 
495
  if profile:
496
- username = f"{profile.username}"
497
  print(f"User logged in: {username}")
498
  else:
499
  print("User not logged in.")
500
- return "Please login to Hugging Face using the button below.", None
501
 
502
  api_url = DEFAULT_API_URL
503
  questions_url = f"{api_url}/questions"
504
  submit_url = f"{api_url}/submit"
505
 
506
- # 1. Instantiate Agent
507
  try:
508
- agent = GAIABenchmarkAgent()
509
  except Exception as e:
510
  print(f"Error instantiating agent: {e}")
511
- traceback.print_exc()
512
  return f"Error initializing agent: {e}", None
513
-
514
- # For HuggingFace spaces, this points to the repository
515
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
516
- print(f"Agent code URL: {agent_code}")
517
 
518
  # 2. Fetch Questions
519
  print(f"Fetching questions from: {questions_url}")
@@ -536,66 +238,40 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
536
  print(f"An unexpected error occurred fetching questions: {e}")
537
  return f"An unexpected error occurred fetching questions: {e}", None
538
 
539
- # 3. Run Agent
540
  results_log = []
541
  answers_payload = []
542
  print(f"Running agent on {len(questions_data)} questions...")
543
-
544
  for item in questions_data:
545
  task_id = item.get("task_id")
546
  question_text = item.get("question")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  if not task_id or question_text is None:
548
  print(f"Skipping item with missing task_id or question: {item}")
549
  continue
550
  try:
551
- print(f"Processing task {task_id}: {question_text[:50]}...")
552
-
553
- # Run the agent with retry mechanism
554
- max_retries = 2
555
- submitted_answer = None
556
- last_error = None
557
-
558
- for retry in range(max_retries + 1):
559
- try:
560
- if retry > 0:
561
- print(f"Retry {retry}/{max_retries} for task {task_id}")
562
-
563
- submitted_answer = agent(question_text)
564
-
565
- # Very short answers might be incorrect - check length
566
- if submitted_answer and len(submitted_answer) < 2:
567
- # For extremely short answers, try one more time
568
- backup_answer = agent(question_text)
569
- # Choose the longer answer if both are very short
570
- if len(backup_answer) > len(submitted_answer):
571
- submitted_answer = backup_answer
572
-
573
- break
574
- except Exception as e:
575
- last_error = e
576
- print(f"Error on attempt {retry+1}: {e}")
577
- # Small delay before retry
578
- time.sleep(1)
579
-
580
- # If all retries failed, use error message or fallbacks
581
- if submitted_answer is None:
582
- if last_error:
583
- # Try to use special case handling
584
- if "opposite of left" in question_text.lower() or "rewsna eht sa" in question_text:
585
- submitted_answer = "right"
586
- else:
587
- submitted_answer = f"Error: {str(last_error)}"
588
- else:
589
- submitted_answer = "Unable to determine answer after multiple attempts."
590
-
591
- # Add to answers and log
592
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
593
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
594
- print(f"Completed task {task_id}")
595
-
596
- # Add small delay to avoid API rate limits
597
- time.sleep(0.5)
598
-
599
  except Exception as e:
600
  print(f"Error running agent on task {task_id}: {e}")
601
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -655,16 +331,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
655
 
656
  # --- Build Gradio Interface using Blocks ---
657
  with gr.Blocks() as demo:
658
- gr.Markdown("# Advanced GAIA Agent Evaluation Runner")
659
  gr.Markdown(
660
  """
661
  **Instructions:**
662
-
663
- 1. Use the login button below to sign in with your Hugging Face account.
664
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.
665
-
666
- **Note:** This process may take several minutes to complete as the agent processes each question.
667
- The agent uses advanced tools for web search, code execution, and data analysis to solve GAIA benchmark tasks.
 
668
  """
669
  )
670
 
@@ -673,6 +350,7 @@ with gr.Blocks() as demo:
673
  run_button = gr.Button("Run Evaluation & Submit All Answers")
674
 
675
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
676
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
677
 
678
  run_button.click(
@@ -682,24 +360,24 @@ with gr.Blocks() as demo:
682
 
683
  if __name__ == "__main__":
684
  print("\n" + "-"*30 + " App Starting " + "-"*30)
685
- # Check for SPACE_HOST and SPACE_ID at startup
686
  space_host_startup = os.getenv("SPACE_HOST")
687
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
688
 
689
  if space_host_startup:
690
- print(f" SPACE_HOST found: {space_host_startup}")
691
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
692
  else:
693
- print("SPACE_HOST environment variable not found (running locally?).")
694
 
695
  if space_id_startup: # Print repo URLs if SPACE_ID is found
696
- print(f" SPACE_ID found: {space_id_startup}")
697
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
698
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
699
  else:
700
- print("SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
701
 
702
  print("-"*(60 + len(" App Starting ")) + "\n")
703
 
704
- print("Launching GAIA Agent Evaluation Interface...")
705
- demo.launch(debug=True, share=True)
 
1
+ # app.py
2
  import os
3
  import gradio as gr
4
  import requests
5
+ import openai
6
+ from smolagents import OpenAIServerModel, DuckDuckGoSearchTool, CodeAgent, WikipediaSearchTool
7
+ from pathlib import Path
8
+ import tempfile
9
+ from smolagents.tools import PipelineTool, Tool
10
+ import pathlib
11
+ from typing import Union, Optional
12
  import pandas as pd
13
+ from tabulate import tabulate # pragma: no cover – fallback path
14
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # (Keep Constants as is)
17
  # --- Constants ---
18
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
+ class SpeechToTextTool(PipelineTool):
 
 
21
  """
22
+ Transcribes an audio file to text using the OpenAI Whisper API.
23
+ Only local file paths are supported.
 
 
 
 
 
24
  """
25
+ default_checkpoint = "openai/whisper-1" # purely informational here
26
+ description = (
27
+ "This tool sends an audio file to OpenAI Whisper and returns the "
28
+ "transcribed text."
29
+ )
30
+ name = "transcriber"
31
+ inputs = {
32
+ "audio": {
33
+ "type": "string",
34
+ "description": "Absolute or relative path to a local audio file.",
35
+ }
36
+ }
37
+ output_type = "string"
38
 
39
+ # ──────────────────────────────────────────────────────────────────────────
40
+ # Public interface
41
+ # ──────────────────────────────────────────────────────────────────────────
42
+ def __call__(self, audio: str) -> str:
43
+ """
44
+ Convenience wrapper so the tool can be used like a regular function:
45
+ text = SpeechToTextTool()(path_to_audio)
46
+ """
47
+ return self._transcribe(audio)
48
+
49
+ # ──────────────────────────────────────────────────────────────────────────
50
+ # Internal helpers
51
+ # ──────────────────────────────────────────────────────────────────────────
52
+ @staticmethod
53
+ def _transcribe(audio_path: str) -> str:
54
+ # ----- validation ----------------------------------------------------
55
+ if not isinstance(audio_path, str):
56
+ raise TypeError(
57
+ "Parameter 'audio' must be a string containing the file path."
58
+ )
59
+ path = Path(audio_path).expanduser().resolve()
60
+ if not path.is_file():
61
+ raise FileNotFoundError(f"No such audio file: {path}")
62
+
63
+ # ----- API call ------------------------------------------------------
64
+ with path.open("rb") as fp:
65
+ response = openai.audio.transcriptions.create(
66
+ file=fp,
67
+ model="whisper-1", # currently the only Whisper model
68
+ response_format="text" # returns plain text instead of JSON
69
+ )
70
 
71
+ # For response_format="text", `response` is already the raw transcript
72
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ class ExcelToTextTool(Tool):
75
+ """Render an Excel worksheet as Markdown text."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # ------------------------------------------------------------------
78
+ # Required smol‑agents metadata
79
+ # ------------------------------------------------------------------
80
+ name = "excel_to_text"
81
+ description = (
82
+ "Read an Excel file and return a Markdown table of the requested sheet. "
83
+ "Accepts either the sheet name or the zero-based index."
84
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ inputs = {
87
+ "excel_path": {
88
+ "type": "string",
89
+ "description": "Path to the Excel file (.xlsx / .xls).",
90
+ },
91
+ "sheet_name": {
92
+ "type": "string",
93
+ "description": (
94
+ "Worksheet name or zero‑based index *as a string* (optional; default first sheet)."
95
+ ),
96
+ "nullable": True,
97
+ },
98
+ }
99
+
100
+ output_type = "string"
101
+
102
+ # ------------------------------------------------------------------
103
+ # Core logic
104
+ # ------------------------------------------------------------------
105
+ def forward(
106
+ self,
107
+ excel_path: str,
108
+ sheet_name: Optional[str] = None,
109
+ ) -> str:
110
+ """Load *excel_path* and return the sheet as a Markdown table."""
111
+
112
+ path = pathlib.Path(excel_path).expanduser().resolve()
113
+ if not path.exists():
114
+ return f"Error: Excel file not found at {path}"
115
+
116
+ try:
117
+ # Interpret sheet identifier -----------------------------------
118
+ sheet: Union[str, int]
119
+ if sheet_name is None or sheet_name == "":
120
+ sheet = 0 # first sheet
121
+ else:
122
+ # If the user passed a numeric string (e.g. "1"), cast to int
123
+ sheet = int(sheet_name) if sheet_name.isdigit() else sheet_name
124
+
125
+ # Load worksheet ----------------------------------------------
126
+ df = pd.read_excel(path, sheet_name=sheet)
127
+
128
+ # Render to Markdown; fall back to tabulate if needed ---------
129
+ if hasattr(pd.DataFrame, "to_markdown"):
130
+ return df.to_markdown(index=False)
131
+ from tabulate import tabulate # pragma: no cover – fallback path
132
+
133
+ return tabulate(df, headers="keys", tablefmt="github", showindex=False)
134
+
135
+ except Exception as exc: # broad catch keeps the agent chat‑friendly
136
+ return f"Error reading Excel file: {exc}"
137
+
138
+
139
+ def download_file_if_any(base_api_url: str, task_id: str) -> str | None:
140
  """
141
+ Try GET /files/{task_id}.
142
+ • On HTTP 200 → save to a temp dir and return local path.
143
+ • On 404 → return None.
144
+ On other errors raise so caller can log / handle.
 
 
 
 
145
  """
146
+ url = f"{base_api_url}/files/{task_id}"
147
  try:
148
+ resp = requests.get(url, timeout=30)
149
+ if resp.status_code == 404:
150
+ return None # no file
151
+ resp.raise_for_status() # raise on 4xx/5xx ≠ 404
152
+ except requests.exceptions.HTTPError as e:
153
+ # propagate non-404 errors (403, 500, …)
154
+ raise e
155
+
156
+ # ▸ Save bytes to a named file inside the system temp dir
157
+ # Try to keep original extension from Content-Disposition if present.
158
+ cdisp = resp.headers.get("content-disposition", "")
159
+ filename = task_id # default base name
160
+ if "filename=" in cdisp:
161
+ m = re.search(r'filename="([^"]+)"', cdisp)
162
+ if m:
163
+ filename = m.group(1) # keep provided name
164
+
165
+ tmp_dir = Path(tempfile.gettempdir()) / "gaia_files"
166
+ tmp_dir.mkdir(exist_ok=True)
167
+ file_path = tmp_dir / filename
168
+ with open(file_path, "wb") as f:
169
+ f.write(resp.content)
170
+ return str(file_path)
171
+
172
+ # --- Basic Agent Definition ---
173
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
174
+ class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def __init__(self):
176
+ self.agent = CodeAgent(
177
+ model=OpenAIServerModel(model_id="gpt-4o"),
178
+ tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), SpeechToTextTool(), ExcelToTextTool()],
179
+ add_base_tools=True,
180
+ additional_authorized_imports=['pandas','numpy','csv','subprocess']
181
+ )
182
+
183
+ print("BasicAgent initialized.")
184
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def __call__(self, question: str) -> str:
 
186
  print(f"Agent received question (first 50 chars): {question[:50]}...")
187
+ fixed_answer = self.agent.run(question)
188
+ print(f"Agent returning answer: {fixed_answer}")
189
+ return fixed_answer
190
+
191
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  """
193
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
194
  and displays the results.
195
  """
196
  # --- Determine HF Space Runtime URL and Repo URL ---
197
+ space_id = "l3xv/Final_Assignment_Template"
198
 
199
  if profile:
200
+ username= f"{profile.username}"
201
  print(f"User logged in: {username}")
202
  else:
203
  print("User not logged in.")
204
+ return "Please Login to Hugging Face with the button.", None
205
 
206
  api_url = DEFAULT_API_URL
207
  questions_url = f"{api_url}/questions"
208
  submit_url = f"{api_url}/submit"
209
 
210
+ # 1. Instantiate Agent ( modify this part to create your agent)
211
  try:
212
+ agent = BasicAgent()
213
  except Exception as e:
214
  print(f"Error instantiating agent: {e}")
 
215
  return f"Error initializing agent: {e}", None
216
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
217
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
218
+ print(agent_code)
219
 
220
  # 2. Fetch Questions
221
  print(f"Fetching questions from: {questions_url}")
 
238
  print(f"An unexpected error occurred fetching questions: {e}")
239
  return f"An unexpected error occurred fetching questions: {e}", None
240
 
241
+ # 3. Run your Agent
242
  results_log = []
243
  answers_payload = []
244
  print(f"Running agent on {len(questions_data)} questions...")
 
245
  for item in questions_data:
246
  task_id = item.get("task_id")
247
  question_text = item.get("question")
248
+
249
+ # ----------fetch any attached file ----------
250
+ try:
251
+ file_path = download_file_if_any(api_url, task_id)
252
+ except Exception as e:
253
+ file_path = None
254
+ print(f"[file fetch error] {task_id}: {e}")
255
+
256
+ # ---------- Build the prompt sent to the agent ----------
257
+ if file_path:
258
+ q_for_agent = (
259
+ f"{question_text}\n\n"
260
+ f"---\n"
261
+ f"A file was downloaded for this task and saved locally at:\n"
262
+ f"{file_path}\n"
263
+ f"---\n\n"
264
+ )
265
+ else:
266
+ q_for_agent = question_text
267
+
268
  if not task_id or question_text is None:
269
  print(f"Skipping item with missing task_id or question: {item}")
270
  continue
271
  try:
272
+ submitted_answer = agent(q_for_agent)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
274
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
275
  except Exception as e:
276
  print(f"Error running agent on task {task_id}: {e}")
277
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
331
 
332
  # --- Build Gradio Interface using Blocks ---
333
  with gr.Blocks() as demo:
334
+ gr.Markdown("# Basic Agent Evaluation Runner")
335
  gr.Markdown(
336
  """
337
  **Instructions:**
338
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
339
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
340
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
341
+ ---
342
+ **Disclaimers:**
343
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
344
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
345
  """
346
  )
347
 
 
350
  run_button = gr.Button("Run Evaluation & Submit All Answers")
351
 
352
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
353
+ # Removed max_rows=10 from DataFrame constructor
354
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
355
 
356
  run_button.click(
 
360
 
361
  if __name__ == "__main__":
362
  print("\n" + "-"*30 + " App Starting " + "-"*30)
363
+ # Check for SPACE_HOST and SPACE_ID at startup for information
364
  space_host_startup = os.getenv("SPACE_HOST")
365
+ space_id_startup = "l3xv/Final_Assignment_Template"
366
 
367
  if space_host_startup:
368
+ print(f" SPACE_HOST found: {space_host_startup}")
369
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
370
  else:
371
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
372
 
373
  if space_id_startup: # Print repo URLs if SPACE_ID is found
374
+ print(f" SPACE_ID found: {space_id_startup}")
375
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
376
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
377
  else:
378
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
379
 
380
  print("-"*(60 + len(" App Starting ")) + "\n")
381
 
382
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
383
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,20 +1,9 @@
1
  gradio
2
  requests
3
- smolagents
4
- langgraph
5
- llama-index
6
- litellm
7
- pandas
8
- requests
9
- youtube-transcript-api
10
- openai-whisper
11
- SPARQLWrapper
12
- python-chess
13
- PyPDF2
14
- Pillow
15
- beautifulsoup4
16
- numpy
17
- sympy
18
- openai
19
  smolagents[openai]
20
- python-dotenv
 
 
 
 
 
 
1
  gradio
2
  requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  smolagents[openai]
4
+ smolagents[audio]
5
+ smolagents
6
+ wikipedia-api
7
+ transformers
8
+ smolagents[transformers]
9
+ tabulate