Final_Assignment_Template

Running

App Files Files Community

mobrobro commited on 3 days ago

Commit

0070112

verified ·

1 Parent(s): 519da76

Update app.py

Browse files

Key improvements in this implementation:

Robust Rate Limit Handling:

Uses a manual retry system with exponential backoff
Starts with a much longer base wait time (20-30 seconds)
Doubles wait time on each retry

Two-level Caching:

Caches questions to avoid repeated API calls
Also caches answers for each task ID to allow resuming interrupted runs
Saves cache after each question to preserve progress

Separate Retry Mechanisms:

Customized retry approach for questions endpoint
Another retry mechanism for the submission endpoint
Different wait times for different endpoints

Error Handling:

Better error detection and reporting
Continues processing even if file checks fail

This implementation significantly improves reliability by using much longer wait times between retries (which is often what's needed for severe rate limiting) and implementing a complete caching system that preserves all work done so far.

Files changed (1) hide show

app.py +222 -123

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 import sys
 from pathlib import Path
@@ -219,30 +221,62 @@ def extract_final_answer(agent_response):
     return "Unable to determine"
-# Replace BasicAgent with your SmolaAgent in the run_and_submit_all function
-import backoff
-import time
-import json
-import os
-# Add backoff decorator for API requests
-@backoff.on_exception(
-    backoff.expo,
-    requests.exceptions.HTTPError,
-    max_tries=5,
-    giveup=lambda e: e.response.status_code != 429,
-    factor=2
-)
-def rate_limited_request(method, url, **kwargs):
-    """Make a request with automatic backoff for rate limited requests"""
-    response = requests.request(method, url, **kwargs)
-    response.raise_for_status()
-    return response
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the SmolaAgent on them, submits all answers,
-    and displays the results with rate limit handling.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
@@ -269,104 +303,150 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
-    # 2. Try to load cached questions or fetch from API with rate limit handling
-    cached_questions_path = "cached_questions.json"
-    if os.path.exists(cached_questions_path) and os.path.getsize(cached_questions_path) > 2:
-        print(f"Loading cached questions from {cached_questions_path}")
         try:
-            with open(cached_questions_path, "r") as f:
                 questions_data = json.load(f)
             print(f"Loaded {len(questions_data)} questions from cache")
         except Exception as e:
             print(f"Error loading cached questions: {e}")
-            return f"Error loading cached questions: {e}", None
     else:
-        # Fetch questions from API with rate limit handling
-        print(f"No cached questions found. Fetching from: {questions_url}")
         try:
-            response = rate_limited_request("GET", questions_url, timeout=15)
-            questions_data = response.json()
-            # Cache the questions for future runs
-            if questions_data:
                 try:
-                    with open(cached_questions_path, "w") as f:
-                        json.dump(questions_data, f)
-                    print(f"Cached {len(questions_data)} questions to {cached_questions_path}")
-                except Exception as e:
-                    print(f"Warning: Failed to cache questions: {e}")
-        except requests.exceptions.RequestException as e:
             print(f"Error fetching questions: {e}")
             return f"Error fetching questions: {e}", None
-        except requests.exceptions.JSONDecodeError as e:
-            print(f"Error decoding JSON response from questions endpoint: {e}")
-            print(f"Response text: {response.text[:500]}")
-            return f"Error decoding server response for questions: {e}", None
-        except Exception as e:
-            print(f"An unexpected error occurred fetching questions: {e}")
-            return f"An unexpected error occurred fetching questions: {e}", None
-    if not questions_data:
-        print("Questions list is empty.")
-        return "Questions list is empty or invalid format.", None
-    print(f"Processing {len(questions_data)} questions...")
     # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        # Check if there are files associated with this task with rate limit handling
-        try:
-            files_url = f"{api_url}/files/{task_id}"
-            try:
-                files_response = rate_limited_request("GET", files_url, timeout=15)
-                if files_response.status_code == 200:
-                    print(f"Task {task_id} has associated files")
-                    # Handle files if needed
-            except Exception as e:
-                print(f"Error checking for files for task {task_id}: {e}")
-                # Continue even if file check fails
-        except Exception as e:
-            print(f"Error checking for files for task {task_id}: {e}")
-            # Continue even if file check fails
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
-        try:
-            # Get full agent response
-            full_response = agent(question_text)
-            # Extract just the final answer part for submission
-            submitted_answer = extract_final_answer(full_response)
-            # Add to submission payload
-            answers_payload.append({
-                "task_id": task_id,
-                "submitted_answer": submitted_answer,
-                "reasoning_trace": full_response  # Optional: include full reasoning
-            })
-            # Log for display
-            results_log.append({
-                "Task ID": task_id,
-                "Question": question_text,
-                "Submitted Answer": submitted_answer,
-                "Full Response": full_response
-            })
-            print(f"Processed task {task_id}, answer: {submitted_answer}")
-        except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
@@ -377,42 +457,61 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
-    # 5. Submit with rate limit handling
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = rate_limited_request("POST", submit_url, json=submission_data, timeout=60)
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)

 import requests
 import inspect
 import pandas as pd
+import json
+import time
 import sys
 from pathlib import Path
     return "Unable to determine"
+# Simple rate-limited request function with retry
+def make_rate_limited_request(url, method="GET", max_retries=5, initial_wait=5, **kwargs):
+    """
+    Makes HTTP requests with automatic handling of rate limits (429)
+    Args:
+        url: The URL to request
+        method: HTTP method (GET, POST, etc.)
+        max_retries: Maximum number of retries for rate limit errors
+        initial_wait: Initial wait time in seconds, doubled on each retry
+        **kwargs: Additional arguments to pass to requests.request
+    Returns:
+        requests.Response object on success
+    Raises:
+        Exception if max_retries is exceeded
+    """
+    wait_time = initial_wait
+    for attempt in range(max_retries):
+        try:
+            response = requests.request(method, url, **kwargs)
+            # If not rate limited, return the response
+            if response.status_code != 429:
+                return response
+            # Handle rate limiting
+            retry_after = response.headers.get('Retry-After')
+            if retry_after:
+                # If server specified wait time, use that
+                wait_seconds = int(retry_after)
+                print(f"Rate limited. Server requested wait of {wait_seconds} seconds.")
+            else:
+                # Otherwise use exponential backoff
+                wait_seconds = wait_time
+                wait_time *= 2  # Double the wait time for next attempt
+                print(f"Rate limited. Using exponential backoff: waiting {wait_seconds} seconds.")
+            # Sleep and retry
+            time.sleep(wait_seconds)
+        except requests.exceptions.RequestException as e:
+            print(f"Request error: {e}")
+            # For connection errors, wait and retry
+            time.sleep(wait_time)
+            wait_time *= 2
+    # If we get here, we've exceeded max_retries
+    raise Exception(f"Failed to get a valid response after {max_retries} attempts")
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the SmolaAgent on them, submits all answers,
+    and displays the results. Uses caching and handles rate limits.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
+    # 2. Use cached questions or fetch with rate limiting
+    cache_file = "cached_questions.json"
+    # Try to load from cache first
+    if os.path.exists(cache_file) and os.path.getsize(cache_file) > 10:
+        print(f"Loading cached questions from {cache_file}")
         try:
+            with open(cache_file, 'r') as f:
                 questions_data = json.load(f)
             print(f"Loaded {len(questions_data)} questions from cache")
         except Exception as e:
             print(f"Error loading cached questions: {e}")
+            questions_data = None
     else:
+        questions_data = None
+    # Fetch if not cached
+    if not questions_data:
+        print("Fetching questions with rate limit handling...")
         try:
+            # Manually implement a retry with long waits
+            max_attempts = 5
+            base_wait = 20  # Start with a long wait time
+            for attempt in range(max_attempts):
+                print(f"Attempt {attempt+1}/{max_attempts} to fetch questions")
                 try:
+                    response = requests.get(questions_url, timeout=15)
+                    if response.status_code == 200:
+                        questions_data = response.json()
+                        print(f"Successfully fetched {len(questions_data)} questions")
+                        # Cache for future use
+                        try:
+                            with open(cache_file, 'w') as f:
+                                json.dump(questions_data, f)
+                            print(f"Cached {len(questions_data)} questions to {cache_file}")
+                        except Exception as e:
+                            print(f"Warning: Failed to cache questions: {e}")
+                        break  # Success, exit retry loop
+                    elif response.status_code == 429:
+                        wait_time = base_wait * (2 ** attempt)
+                        print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
+                        time.sleep(wait_time)
+                    else:
+                        print(f"Unexpected status code: {response.status_code}")
+                        time.sleep(base_wait)
+                except requests.exceptions.RequestException as e:
+                    print(f"Request error: {e}")
+                    time.sleep(base_wait)
+            if not questions_data:
+                return "Failed to fetch questions after multiple attempts. Please try again later.", None
+        except Exception as e:
             print(f"Error fetching questions: {e}")
             return f"Error fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
     answers_payload = []
+    answers_cache_file = "cached_answers.json"
+    # Try to load cached answers
+    cached_answers = {}
+    if os.path.exists(answers_cache_file):
+        try:
+            with open(answers_cache_file, 'r') as f:
+                cached_answers = json.load(f)
+            print(f"Loaded {len(cached_answers)} cached answers")
+        except Exception as e:
+            print(f"Error loading cached answers: {e}")
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Check if we already have a cached answer for this task
+        if task_id in cached_answers:
+            print(f"Using cached answer for task {task_id}")
+            full_response = cached_answers[task_id]['full_response']
+            submitted_answer = cached_answers[task_id]['submitted_answer']
+        else:
+            try:
+                # Check for associated files with manual retry
+                try:
+                    files_url = f"{api_url}/files/{task_id}"
+                    files_response = requests.get(files_url, timeout=15)
+                    if files_response.status_code == 200:
+                        print(f"Task {task_id} has associated files")
+                        # Handle files if needed
+                except Exception as e:
+                    print(f"Error checking for files for task {task_id}: {e}")
+                # Get agent response
+                full_response = agent(question_text)
+                # Extract final answer
+                submitted_answer = extract_final_answer(full_response)
+                # Cache this answer
+                cached_answers[task_id] = {
+                    'full_response': full_response,
+                    'submitted_answer': submitted_answer
+                }
+                # Save to cache after each question to avoid losing progress
+                try:
+                    with open(answers_cache_file, 'w') as f:
+                        json.dump(cached_answers, f)
+                except Exception as e:
+                    print(f"Warning: Failed to save answer cache: {e}")
+            except Exception as e:
+                print(f"Error running agent on task {task_id}: {e}")
+                full_response = f"AGENT ERROR: {e}"
+                submitted_answer = "Unable to determine"
+        # Add to submission payload
+        answers_payload.append({
+            "task_id": task_id,
+            "submitted_answer": submitted_answer,
+            "reasoning_trace": full_response
+        })
+        # Log for display
+        results_log.append({
+            "Task ID": task_id,
+            "Question": question_text,
+            "Submitted Answer": submitted_answer,
+            "Full Response": full_response
+        })
+        print(f"Processed task {task_id}, answer: {submitted_answer}")
     if not answers_payload:
         print("Agent did not produce any answers to submit.")
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    # 5. Submit with robust retry mechanism
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        # Use manual retry for submission
+        max_attempts = 5
+        base_wait = 30  # Start with a long wait time
+        for attempt in range(max_attempts):
+            print(f"Submission attempt {attempt+1}/{max_attempts}")
+            try:
+                response = requests.post(submit_url, json=submission_data, timeout=60)
+                if response.status_code == 200:
+                    result_data = response.json()
+                    final_status = (
+                        f"Submission Successful!\n"
+                        f"User: {result_data.get('username')}\n"
+                        f"Overall Score: {result_data.get('score', 'N/A')}% "
+                        f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+                        f"Message: {result_data.get('message', 'No message received.')}"
+                    )
+                    print("Submission successful.")
+                    results_df = pd.DataFrame(results_log)
+                    return final_status, results_df
+                elif response.status_code == 429:
+                    wait_time = base_wait * (2 ** attempt)
+                    print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Submission failed with status code: {response.status_code}")
+                    error_detail = f"Server responded with status {response.status_code}."
+                    try:
+                        error_json = response.json()
+                        error_detail += f" Detail: {error_json.get('detail', response.text)}"
+                    except:
+                        error_detail += f" Response: {response.text[:500]}"
+                    # For non-429 errors, don't retry
+                    status_message = f"Submission Failed: {error_detail}"
+                    print(status_message)
+                    results_df = pd.DataFrame(results_log)
+                    return status_message, results_df
+            except requests.exceptions.RequestException as e:
+                print(f"Request error during submission: {e}")
+                time.sleep(base_wait)
+        # If we get here, all attempts failed
+        status_message = f"Submission Failed: Maximum retry attempts exceeded."
         print(status_message)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)