mobrobro commited on
Commit
0070112
·
verified ·
1 Parent(s): 519da76

Update app.py

Browse files

Key improvements in this implementation:

Robust Rate Limit Handling:

Uses a manual retry system with exponential backoff
Starts with a much longer base wait time (20-30 seconds)
Doubles wait time on each retry


Two-level Caching:

Caches questions to avoid repeated API calls
Also caches answers for each task ID to allow resuming interrupted runs
Saves cache after each question to preserve progress


Separate Retry Mechanisms:

Customized retry approach for questions endpoint
Another retry mechanism for the submission endpoint
Different wait times for different endpoints


Error Handling:

Better error detection and reporting
Continues processing even if file checks fail



This implementation significantly improves reliability by using much longer wait times between retries (which is often what's needed for severe rate limiting) and implementing a complete caching system that preserves all work done so far.

Files changed (1) hide show
  1. app.py +222 -123
app.py CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
  import sys
7
  from pathlib import Path
8
 
@@ -219,30 +221,62 @@ def extract_final_answer(agent_response):
219
 
220
  return "Unable to determine"
221
 
222
- # Replace BasicAgent with your SmolaAgent in the run_and_submit_all function
223
- import backoff
224
- import time
225
- import json
226
- import os
227
-
228
- # Add backoff decorator for API requests
229
- @backoff.on_exception(
230
- backoff.expo,
231
- requests.exceptions.HTTPError,
232
- max_tries=5,
233
- giveup=lambda e: e.response.status_code != 429,
234
- factor=2
235
- )
236
- def rate_limited_request(method, url, **kwargs):
237
- """Make a request with automatic backoff for rate limited requests"""
238
- response = requests.request(method, url, **kwargs)
239
- response.raise_for_status()
240
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  def run_and_submit_all(profile: gr.OAuthProfile | None):
243
  """
244
  Fetches all questions, runs the SmolaAgent on them, submits all answers,
245
- and displays the results with rate limit handling.
246
  """
247
  # --- Determine HF Space Runtime URL and Repo URL ---
248
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
@@ -269,104 +303,150 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
269
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
270
  print(agent_code)
271
 
272
- # 2. Try to load cached questions or fetch from API with rate limit handling
273
- cached_questions_path = "cached_questions.json"
274
 
275
- if os.path.exists(cached_questions_path) and os.path.getsize(cached_questions_path) > 2:
276
- print(f"Loading cached questions from {cached_questions_path}")
 
277
  try:
278
- with open(cached_questions_path, "r") as f:
279
  questions_data = json.load(f)
280
  print(f"Loaded {len(questions_data)} questions from cache")
281
  except Exception as e:
282
  print(f"Error loading cached questions: {e}")
283
- return f"Error loading cached questions: {e}", None
284
  else:
285
- # Fetch questions from API with rate limit handling
286
- print(f"No cached questions found. Fetching from: {questions_url}")
 
 
 
287
  try:
288
- response = rate_limited_request("GET", questions_url, timeout=15)
289
- questions_data = response.json()
 
290
 
291
- # Cache the questions for future runs
292
- if questions_data:
 
293
  try:
294
- with open(cached_questions_path, "w") as f:
295
- json.dump(questions_data, f)
296
- print(f"Cached {len(questions_data)} questions to {cached_questions_path}")
297
- except Exception as e:
298
- print(f"Warning: Failed to cache questions: {e}")
299
- except requests.exceptions.RequestException as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  print(f"Error fetching questions: {e}")
301
  return f"Error fetching questions: {e}", None
302
- except requests.exceptions.JSONDecodeError as e:
303
- print(f"Error decoding JSON response from questions endpoint: {e}")
304
- print(f"Response text: {response.text[:500]}")
305
- return f"Error decoding server response for questions: {e}", None
306
- except Exception as e:
307
- print(f"An unexpected error occurred fetching questions: {e}")
308
- return f"An unexpected error occurred fetching questions: {e}", None
309
-
310
- if not questions_data:
311
- print("Questions list is empty.")
312
- return "Questions list is empty or invalid format.", None
313
-
314
- print(f"Processing {len(questions_data)} questions...")
315
 
316
  # 3. Run your Agent
317
  results_log = []
318
  answers_payload = []
 
 
 
 
 
 
 
 
 
 
 
 
319
  print(f"Running agent on {len(questions_data)} questions...")
320
  for item in questions_data:
321
  task_id = item.get("task_id")
322
  question_text = item.get("question")
323
 
324
- # Check if there are files associated with this task with rate limit handling
325
- try:
326
- files_url = f"{api_url}/files/{task_id}"
327
- try:
328
- files_response = rate_limited_request("GET", files_url, timeout=15)
329
- if files_response.status_code == 200:
330
- print(f"Task {task_id} has associated files")
331
- # Handle files if needed
332
- except Exception as e:
333
- print(f"Error checking for files for task {task_id}: {e}")
334
- # Continue even if file check fails
335
- except Exception as e:
336
- print(f"Error checking for files for task {task_id}: {e}")
337
- # Continue even if file check fails
338
-
339
  if not task_id or question_text is None:
340
  print(f"Skipping item with missing task_id or question: {item}")
341
  continue
342
 
343
- try:
344
- # Get full agent response
345
- full_response = agent(question_text)
346
-
347
- # Extract just the final answer part for submission
348
- submitted_answer = extract_final_answer(full_response)
 
 
 
 
 
 
 
 
 
 
349
 
350
- # Add to submission payload
351
- answers_payload.append({
352
- "task_id": task_id,
353
- "submitted_answer": submitted_answer,
354
- "reasoning_trace": full_response # Optional: include full reasoning
355
- })
356
-
357
- # Log for display
358
- results_log.append({
359
- "Task ID": task_id,
360
- "Question": question_text,
361
- "Submitted Answer": submitted_answer,
362
- "Full Response": full_response
363
- })
364
-
365
- print(f"Processed task {task_id}, answer: {submitted_answer}")
366
-
367
- except Exception as e:
368
- print(f"Error running agent on task {task_id}: {e}")
369
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  if not answers_payload:
372
  print("Agent did not produce any answers to submit.")
@@ -377,42 +457,61 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
377
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
378
  print(status_update)
379
 
380
- # 5. Submit with rate limit handling
381
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
382
  try:
383
- response = rate_limited_request("POST", submit_url, json=submission_data, timeout=60)
384
- result_data = response.json()
385
- final_status = (
386
- f"Submission Successful!\n"
387
- f"User: {result_data.get('username')}\n"
388
- f"Overall Score: {result_data.get('score', 'N/A')}% "
389
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
390
- f"Message: {result_data.get('message', 'No message received.')}"
391
- )
392
- print("Submission successful.")
393
- results_df = pd.DataFrame(results_log)
394
- return final_status, results_df
395
- except requests.exceptions.HTTPError as e:
396
- error_detail = f"Server responded with status {e.response.status_code}."
397
- try:
398
- error_json = e.response.json()
399
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
400
- except requests.exceptions.JSONDecodeError:
401
- error_detail += f" Response: {e.response.text[:500]}"
402
- status_message = f"Submission Failed: {error_detail}"
403
- print(status_message)
404
- results_df = pd.DataFrame(results_log)
405
- return status_message, results_df
406
- except requests.exceptions.Timeout:
407
- status_message = "Submission Failed: The request timed out."
408
- print(status_message)
409
- results_df = pd.DataFrame(results_log)
410
- return status_message, results_df
411
- except requests.exceptions.RequestException as e:
412
- status_message = f"Submission Failed: Network error - {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  print(status_message)
414
  results_df = pd.DataFrame(results_log)
415
  return status_message, results_df
 
416
  except Exception as e:
417
  status_message = f"An unexpected error occurred during submission: {e}"
418
  print(status_message)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import json
7
+ import time
8
  import sys
9
  from pathlib import Path
10
 
 
221
 
222
  return "Unable to determine"
223
 
224
+ # Simple rate-limited request function with retry
225
+ def make_rate_limited_request(url, method="GET", max_retries=5, initial_wait=5, **kwargs):
226
+ """
227
+ Makes HTTP requests with automatic handling of rate limits (429)
228
+
229
+ Args:
230
+ url: The URL to request
231
+ method: HTTP method (GET, POST, etc.)
232
+ max_retries: Maximum number of retries for rate limit errors
233
+ initial_wait: Initial wait time in seconds, doubled on each retry
234
+ **kwargs: Additional arguments to pass to requests.request
235
+
236
+ Returns:
237
+ requests.Response object on success
238
+
239
+ Raises:
240
+ Exception if max_retries is exceeded
241
+ """
242
+ wait_time = initial_wait
243
+
244
+ for attempt in range(max_retries):
245
+ try:
246
+ response = requests.request(method, url, **kwargs)
247
+
248
+ # If not rate limited, return the response
249
+ if response.status_code != 429:
250
+ return response
251
+
252
+ # Handle rate limiting
253
+ retry_after = response.headers.get('Retry-After')
254
+ if retry_after:
255
+ # If server specified wait time, use that
256
+ wait_seconds = int(retry_after)
257
+ print(f"Rate limited. Server requested wait of {wait_seconds} seconds.")
258
+ else:
259
+ # Otherwise use exponential backoff
260
+ wait_seconds = wait_time
261
+ wait_time *= 2 # Double the wait time for next attempt
262
+ print(f"Rate limited. Using exponential backoff: waiting {wait_seconds} seconds.")
263
+
264
+ # Sleep and retry
265
+ time.sleep(wait_seconds)
266
+
267
+ except requests.exceptions.RequestException as e:
268
+ print(f"Request error: {e}")
269
+ # For connection errors, wait and retry
270
+ time.sleep(wait_time)
271
+ wait_time *= 2
272
+
273
+ # If we get here, we've exceeded max_retries
274
+ raise Exception(f"Failed to get a valid response after {max_retries} attempts")
275
 
276
  def run_and_submit_all(profile: gr.OAuthProfile | None):
277
  """
278
  Fetches all questions, runs the SmolaAgent on them, submits all answers,
279
+ and displays the results. Uses caching and handles rate limits.
280
  """
281
  # --- Determine HF Space Runtime URL and Repo URL ---
282
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
303
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
304
  print(agent_code)
305
 
306
+ # 2. Use cached questions or fetch with rate limiting
307
+ cache_file = "cached_questions.json"
308
 
309
+ # Try to load from cache first
310
+ if os.path.exists(cache_file) and os.path.getsize(cache_file) > 10:
311
+ print(f"Loading cached questions from {cache_file}")
312
  try:
313
+ with open(cache_file, 'r') as f:
314
  questions_data = json.load(f)
315
  print(f"Loaded {len(questions_data)} questions from cache")
316
  except Exception as e:
317
  print(f"Error loading cached questions: {e}")
318
+ questions_data = None
319
  else:
320
+ questions_data = None
321
+
322
+ # Fetch if not cached
323
+ if not questions_data:
324
+ print("Fetching questions with rate limit handling...")
325
  try:
326
+ # Manually implement a retry with long waits
327
+ max_attempts = 5
328
+ base_wait = 20 # Start with a long wait time
329
 
330
+ for attempt in range(max_attempts):
331
+ print(f"Attempt {attempt+1}/{max_attempts} to fetch questions")
332
+
333
  try:
334
+ response = requests.get(questions_url, timeout=15)
335
+
336
+ if response.status_code == 200:
337
+ questions_data = response.json()
338
+ print(f"Successfully fetched {len(questions_data)} questions")
339
+
340
+ # Cache for future use
341
+ try:
342
+ with open(cache_file, 'w') as f:
343
+ json.dump(questions_data, f)
344
+ print(f"Cached {len(questions_data)} questions to {cache_file}")
345
+ except Exception as e:
346
+ print(f"Warning: Failed to cache questions: {e}")
347
+
348
+ break # Success, exit retry loop
349
+
350
+ elif response.status_code == 429:
351
+ wait_time = base_wait * (2 ** attempt)
352
+ print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
353
+ time.sleep(wait_time)
354
+ else:
355
+ print(f"Unexpected status code: {response.status_code}")
356
+ time.sleep(base_wait)
357
+
358
+ except requests.exceptions.RequestException as e:
359
+ print(f"Request error: {e}")
360
+ time.sleep(base_wait)
361
+
362
+ if not questions_data:
363
+ return "Failed to fetch questions after multiple attempts. Please try again later.", None
364
+
365
+ except Exception as e:
366
  print(f"Error fetching questions: {e}")
367
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
  # 3. Run your Agent
370
  results_log = []
371
  answers_payload = []
372
+ answers_cache_file = "cached_answers.json"
373
+
374
+ # Try to load cached answers
375
+ cached_answers = {}
376
+ if os.path.exists(answers_cache_file):
377
+ try:
378
+ with open(answers_cache_file, 'r') as f:
379
+ cached_answers = json.load(f)
380
+ print(f"Loaded {len(cached_answers)} cached answers")
381
+ except Exception as e:
382
+ print(f"Error loading cached answers: {e}")
383
+
384
  print(f"Running agent on {len(questions_data)} questions...")
385
  for item in questions_data:
386
  task_id = item.get("task_id")
387
  question_text = item.get("question")
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
  if not task_id or question_text is None:
390
  print(f"Skipping item with missing task_id or question: {item}")
391
  continue
392
 
393
+ # Check if we already have a cached answer for this task
394
+ if task_id in cached_answers:
395
+ print(f"Using cached answer for task {task_id}")
396
+ full_response = cached_answers[task_id]['full_response']
397
+ submitted_answer = cached_answers[task_id]['submitted_answer']
398
+ else:
399
+ try:
400
+ # Check for associated files with manual retry
401
+ try:
402
+ files_url = f"{api_url}/files/{task_id}"
403
+ files_response = requests.get(files_url, timeout=15)
404
+ if files_response.status_code == 200:
405
+ print(f"Task {task_id} has associated files")
406
+ # Handle files if needed
407
+ except Exception as e:
408
+ print(f"Error checking for files for task {task_id}: {e}")
409
 
410
+ # Get agent response
411
+ full_response = agent(question_text)
412
+
413
+ # Extract final answer
414
+ submitted_answer = extract_final_answer(full_response)
415
+
416
+ # Cache this answer
417
+ cached_answers[task_id] = {
418
+ 'full_response': full_response,
419
+ 'submitted_answer': submitted_answer
420
+ }
421
+
422
+ # Save to cache after each question to avoid losing progress
423
+ try:
424
+ with open(answers_cache_file, 'w') as f:
425
+ json.dump(cached_answers, f)
426
+ except Exception as e:
427
+ print(f"Warning: Failed to save answer cache: {e}")
428
+
429
+ except Exception as e:
430
+ print(f"Error running agent on task {task_id}: {e}")
431
+ full_response = f"AGENT ERROR: {e}"
432
+ submitted_answer = "Unable to determine"
433
+
434
+ # Add to submission payload
435
+ answers_payload.append({
436
+ "task_id": task_id,
437
+ "submitted_answer": submitted_answer,
438
+ "reasoning_trace": full_response
439
+ })
440
+
441
+ # Log for display
442
+ results_log.append({
443
+ "Task ID": task_id,
444
+ "Question": question_text,
445
+ "Submitted Answer": submitted_answer,
446
+ "Full Response": full_response
447
+ })
448
+
449
+ print(f"Processed task {task_id}, answer: {submitted_answer}")
450
 
451
  if not answers_payload:
452
  print("Agent did not produce any answers to submit.")
 
457
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
458
  print(status_update)
459
 
460
+ # 5. Submit with robust retry mechanism
461
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
462
  try:
463
+ # Use manual retry for submission
464
+ max_attempts = 5
465
+ base_wait = 30 # Start with a long wait time
466
+
467
+ for attempt in range(max_attempts):
468
+ print(f"Submission attempt {attempt+1}/{max_attempts}")
469
+
470
+ try:
471
+ response = requests.post(submit_url, json=submission_data, timeout=60)
472
+
473
+ if response.status_code == 200:
474
+ result_data = response.json()
475
+ final_status = (
476
+ f"Submission Successful!\n"
477
+ f"User: {result_data.get('username')}\n"
478
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
479
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
480
+ f"Message: {result_data.get('message', 'No message received.')}"
481
+ )
482
+ print("Submission successful.")
483
+ results_df = pd.DataFrame(results_log)
484
+ return final_status, results_df
485
+
486
+ elif response.status_code == 429:
487
+ wait_time = base_wait * (2 ** attempt)
488
+ print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
489
+ time.sleep(wait_time)
490
+ else:
491
+ print(f"Submission failed with status code: {response.status_code}")
492
+ error_detail = f"Server responded with status {response.status_code}."
493
+ try:
494
+ error_json = response.json()
495
+ error_detail += f" Detail: {error_json.get('detail', response.text)}"
496
+ except:
497
+ error_detail += f" Response: {response.text[:500]}"
498
+
499
+ # For non-429 errors, don't retry
500
+ status_message = f"Submission Failed: {error_detail}"
501
+ print(status_message)
502
+ results_df = pd.DataFrame(results_log)
503
+ return status_message, results_df
504
+
505
+ except requests.exceptions.RequestException as e:
506
+ print(f"Request error during submission: {e}")
507
+ time.sleep(base_wait)
508
+
509
+ # If we get here, all attempts failed
510
+ status_message = f"Submission Failed: Maximum retry attempts exceeded."
511
  print(status_message)
512
  results_df = pd.DataFrame(results_log)
513
  return status_message, results_df
514
+
515
  except Exception as e:
516
  status_message = f"An unexpected error occurred during submission: {e}"
517
  print(status_message)