Jofthomas commited on
Commit
3227abd
·
verified ·
1 Parent(s): 3f5f3ef

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +134 -126
main.py CHANGED
@@ -1,4 +1,3 @@
1
- # Import necessary libraries (ensure all required imports are at the top)
2
  import os
3
  import pandas as pd
4
  from fastapi import FastAPI, HTTPException, Body
@@ -28,7 +27,6 @@ filtered_dataset = None
28
 
29
  ALLOWED_CACHE_BASE = os.path.abspath("/app/.cache")
30
 
31
- # --- Define ErrorResponse if not already defined ---
32
  class ErrorResponse(BaseModel):
33
  detail: str
34
 
@@ -42,22 +40,19 @@ def load_questions():
42
  global filtered_dataset
43
  global questions_for_api
44
  global ground_truth_answers
45
- global task_file_paths # Declare modification of global
46
 
47
  tempo_filtered = []
48
- # Clear existing data from previous runs or restarts
49
  questions_for_api.clear()
50
  ground_truth_answers.clear()
51
- task_file_paths.clear() # Clear the file path mapping
52
 
53
  logger.info("Starting to load and filter GAIA dataset (validation split)...")
54
  try:
55
- # Load the specified split
56
  dataset = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True)
57
  logger.info(f"GAIA dataset validation split loaded. Features: {dataset.features}")
58
  except Exception as e:
59
  logger.error(f"Failed to load GAIA dataset: {e}", exc_info=True)
60
- # Depending on requirements, you might want to exit or raise a more specific error
61
  raise RuntimeError("Could not load the primary GAIA dataset.") from e
62
 
63
  # --- Filtering Logic based on Annotator Metadata ---
@@ -72,44 +67,37 @@ def load_questions():
72
  try:
73
  num_tools = int(num_tools_str)
74
  num_steps = int(num_steps_str)
75
- # Apply filter conditions
76
  if num_tools < tool_threshold and num_steps < step_threshold:
77
- tempo_filtered.append(item) # Add the original item if it matches filter
78
  except ValueError:
79
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Could not convert tool/step count in metadata: tools='{num_tools_str}', steps='{num_steps_str}'.")
80
  else:
81
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - 'Number of tools' or 'Number of steps' missing in Metadata.")
82
  else:
83
- # If metadata is essential for filtering, you might want to skip items without it
84
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Missing 'Annotator Metadata'.")
85
 
86
- filtered_dataset = tempo_filtered # Store the list of filtered original dataset items
87
  logger.info(f"Found {len(filtered_dataset)} questions matching the criteria (tools < {tool_threshold}, steps < {step_threshold}).")
88
 
89
  processed_count = 0
90
- # --- Process filtered items for API and File Mapping ---
91
  for item in filtered_dataset:
92
  # Extract data from the dataset item
93
  task_id = item.get('task_id')
94
  original_question_text = item.get('Question')
95
  final_answer = item.get('Final answer')
96
- local_file_path = item.get('file_path') # Server-local path from dataset
97
- file_name = item.get('file_name') # Filename from dataset
98
 
99
- # Validate essential fields needed for processing & ground truth
100
- # Note: We proceed even if file path/name are missing, just won't map the file.
101
  if task_id and original_question_text and final_answer is not None:
102
 
103
  # 1. Create the dictionary to be exposed via the API
104
  # (Includes 'file_name' for info, but excludes 'file_path')
105
  processed_item = {
106
  "task_id": str(task_id),
107
- "question": str(original_question_text), # Rename 'Question' -> 'question'
108
- # Include other desired fields, using .get() for safety
109
  "Level": item.get("Level"),
110
- "file_name": file_name, # Include filename for client info
111
  }
112
- # Optional: Remove keys with None values if you prefer cleaner JSON
113
  processed_item = {k: v for k, v in processed_item.items() if v is not None}
114
 
115
  questions_for_api.append(processed_item)
@@ -122,21 +110,15 @@ def load_questions():
122
  # Log if the path from the dataset isn't absolute (might indicate issues)
123
  if not os.path.isabs(local_file_path):
124
  logger.warning(f"Task {task_id}: Path '{local_file_path}' from dataset is not absolute. This might cause issues finding the file on the server.")
125
- # Depending on dataset guarantees, you might try making it absolute:
126
- # Assuming WORKDIR is /app as per Dockerfile if paths are relative
127
- # local_file_path = os.path.abspath(os.path.join("/app", local_file_path))
128
 
129
- # Check if the file actually exists at the path ON THE SERVER
130
  if os.path.exists(local_file_path) and os.path.isfile(local_file_path):
131
- # Path exists, store the mapping
132
  task_file_paths[str(task_id)] = local_file_path
133
  logger.debug(f"Stored file path mapping for task_id {task_id}: {local_file_path}")
134
  else:
135
- # Path does *not* exist or is not a file on server filesystem
136
  logger.warning(f"File path '{local_file_path}' for task_id {task_id} does NOT exist or is not a file on server. Mapping skipped.")
137
- # Log if file info was missing in the first place
138
- elif task_id: # Log only if we have a task_id to reference
139
- # Check which specific part was missing for better debugging
140
  if not local_file_path and not file_name:
141
  logger.debug(f"Task {task_id}: No 'file_path' or 'file_name' found in dataset item. No file mapping stored.")
142
  elif not local_file_path:
@@ -147,17 +129,14 @@ def load_questions():
147
 
148
  processed_count += 1
149
  else:
150
- # Log skipping due to missing core fields (task_id, Question, Final answer)
151
  logger.warning(f"Skipping item processing due to missing essential fields: task_id={task_id}, has_question={original_question_text is not None}, has_answer={final_answer is not None}")
152
 
153
- # Final summary logging
154
  logger.info(f"Successfully processed {processed_count} questions for the API.")
155
  logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
156
 
157
  if not questions_for_api:
158
  logger.error("CRITICAL: No valid questions were loaded after filtering and processing. API endpoints like /questions will fail.")
159
- # Consider raising an error if the application cannot function without questions
160
- # raise RuntimeError("Failed to load mandatory question data after filtering.")
161
 
162
 
163
 
@@ -165,8 +144,7 @@ class Question(BaseModel):
165
  task_id: str
166
  question: str
167
  Level: Optional[str] = None
168
- file_name: Optional[str] = None # Keep filename for info
169
- # file_path: Optional[str] = None # REMOVE file_path from the response model
170
 
171
 
172
  # --- The rest of your Pydantic models remain the same ---
@@ -176,7 +154,7 @@ class AnswerItem(BaseModel):
176
 
177
  class Submission(BaseModel):
178
  username: str = Field(..., description="Hugging Face username", min_length=1)
179
- agent_code: str = Field(..., description="The Python class code for the agent", min_length=10) # Basic check
180
  answers: List[AnswerItem] = Field(..., description="List of answers submitted by the agent")
181
 
182
  class ScoreResponse(BaseModel):
@@ -190,15 +168,13 @@ class ScoreResponse(BaseModel):
190
  class ErrorResponse(BaseModel):
191
  detail: str
192
 
193
- # Keep other models as they are (AnswerItem, Submission, ScoreResponse, ErrorResponse)
194
- # ... (rest of the Pydantic models remain the same) ...
195
  class AnswerItem(BaseModel):
196
  task_id: str
197
  submitted_answer: str = Field(..., description="The agent's answer for the task_id")
198
 
199
  class Submission(BaseModel):
200
  username: str = Field(..., description="Hugging Face username", min_length=1)
201
- agent_code: str = Field(..., description="The Python class code for the agent", min_length=10) # Basic check
202
  answers: List[AnswerItem] = Field(..., description="List of answers submitted by the agent")
203
 
204
  class ScoreResponse(BaseModel):
@@ -231,18 +207,16 @@ async def startup_event():
231
  logger.info(f"Successfully loaded {len(questions_for_api)} questions.")
232
  except Exception as e:
233
  logger.error(f"CRITICAL ERROR DURING STARTUP while loading questions: {e}", exc_info=True)
234
- # import sys
235
- # sys.exit(1) # Consider exiting if questions are critical
236
 
237
 
238
- # --- Your Endpoints ---
239
  @app.get("/files/{task_id}",
240
  summary="Get Associated File by Task ID",
241
  description="Downloads the file associated with the given task_id, if one exists and is mapped.",
242
  responses={
243
  200: {
244
  "description": "File content.",
245
- "content": {"*/*": {}} # Indicates response can be any file type
246
  },
247
  403: {"model": ErrorResponse, "description": "Access denied (e.g., path traversal attempt)."},
248
  404: {"model": ErrorResponse, "description": "Task ID not found, no file associated, or file missing on server."},
@@ -265,80 +239,93 @@ async def get_task_file(task_id: str):
265
 
266
  # --- CRUCIAL SECURITY CHECK ---
267
  try:
268
- # Resolve to absolute paths to prevent '..' tricks
269
- # --- local_file_path IS NOW DEFINED before being used ---
270
  abs_file_path = os.path.abspath(local_file_path)
271
  abs_base_path = ALLOWED_CACHE_BASE # Already absolute
272
 
273
- # Check if the resolved file path starts with the allowed base directory
274
  if not abs_file_path.startswith(abs_base_path):
275
  logger.error(f"SECURITY ALERT: Path traversal attempt denied for task_id '{task_id}'. Path '{local_file_path}' resolves outside base '{abs_base_path}'.")
276
  raise HTTPException(status_code=403, detail="File access denied.")
277
 
278
- # Check if the file exists at the resolved, validated path
279
  if not os.path.exists(abs_file_path) or not os.path.isfile(abs_file_path):
280
  logger.error(f"File not found on server for task_id '{task_id}' at expected path: {abs_file_path}")
281
  raise HTTPException(status_code=404, detail=f"File associated with task_id {task_id} not found on server disk.")
282
 
283
  except HTTPException as http_exc:
284
- raise http_exc # Re-raise our own security/404 exceptions
285
  except Exception as path_err:
286
  logger.error(f"Error resolving or checking path '{local_file_path}' for task_id '{task_id}': {path_err}", exc_info=True)
287
  raise HTTPException(status_code=500, detail="Server error validating file path.")
288
- # --- END SECURITY CHECK ---
289
 
290
- # Determine MIME type for the Content-Type header
291
- mime_type, _ = mimetypes.guess_type(abs_file_path) # Ensure 'import mimetypes' is at the top
292
- media_type = mime_type if mime_type else "application/octet-stream" # Default if unknown
293
 
294
- # Extract filename for the Content-Disposition header (suggests filename to browser/client)
 
 
295
  file_name_for_download = os.path.basename(abs_file_path)
296
 
297
  logger.info(f"Serving file '{file_name_for_download}' (type: {media_type}) for task_id '{task_id}' from path: {abs_file_path}")
298
 
299
- # Use FileResponse to efficiently stream the file
300
  return FileResponse(path=abs_file_path, media_type=media_type, filename=file_name_for_download)
301
- def update_huggingface_dataset(username: str, score: float):
302
- """Loads the dataset, updates the score if higher, and pushes back."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  try:
304
- # 1. Load the dataset
305
- logger.info(f"Loading dataset '{HF_DATASET_ID}'...")
 
 
 
 
 
 
 
 
306
  ds_dict = None
 
307
  try:
308
- # Use hf_hub_download to check if the parquet file exists, avoiding full dataset load error if empty
309
- # This assumes the dataset uses the default 'train' split and parquet format. Adjust if needed.
310
- hf_hub_download(repo_id=HF_DATASET_ID, filename="data/train-00000-of-00001.parquet", repo_type="dataset")
311
- ds_dict = load_dataset(HF_DATASET_ID)
312
  logger.info("Dataset loaded successfully.")
313
- if "train" not in ds_dict:
314
- logger.warning(f"Dataset '{HF_DATASET_ID}' does not contain a 'train' split. Creating one.")
315
- df = pd.DataFrame({'username': pd.Series(dtype='str'),
316
- 'score': pd.Series(dtype='float'),
317
- 'timestamp': pd.Series(dtype='str')})
318
  else:
319
- # Convert the 'train' split to a pandas DataFrame for easier manipulation
320
- df = ds_dict['train'].to_pandas()
321
-
322
- except Exception as load_error: # Catch broad exception for file not found or other loading issues
323
- logger.warning(f"Could not load dataset '{HF_DATASET_ID}' or it might be empty/new ({load_error}). Creating structure.")
324
- # Create an empty DataFrame with the correct schema
325
- df = pd.DataFrame({'username': pd.Series(dtype='str'),
326
- 'score': pd.Series(dtype='float'),
327
- 'timestamp': pd.Series(dtype='str')})
328
-
329
-
330
- # Ensure columns exist, add if they don't
331
- for col, dtype in [('username', 'str'), ('score', 'float'), ('timestamp', 'str')]:
332
- if col not in df.columns:
333
- logger.warning(f"Column '{col}' not found in dataset. Adding it.")
334
- df[col] = pd.Series(dtype=dtype)
335
-
336
-
337
- # Convert score column to numeric, coercing errors
338
- df['score'] = pd.to_numeric(df['score'], errors='coerce')
339
- # Fill potential NaN values in score with 0.0 before comparison/aggregation
340
- df['score'] = df['score'].fillna(0.0)
341
-
342
 
343
  # 2. Find existing score for the user
344
  existing_entries = df[df['username'] == username]
@@ -347,51 +334,75 @@ def update_huggingface_dataset(username: str, score: float):
347
 
348
  if not existing_entries.empty:
349
  # User exists, find their highest score
350
- # Handle potential NaN scores from coercion or previous bad data (though fillna above should help)
351
- max_existing_score = existing_entries['score'].max()
352
  if score > max_existing_score:
353
- logger.info(f"New score {score} is higher than existing max {max_existing_score} for {username}. Updating.")
354
- # Remove old entries for this user
355
- df = df[df['username'] != username]
356
- # Add new entry
357
- new_entry = pd.DataFrame([{'username': username, 'score': score, 'timestamp': current_timestamp}])
 
 
 
 
 
358
  df = pd.concat([df, new_entry], ignore_index=True)
359
  needs_update = True
360
  else:
361
  logger.info(f"New score {score} is not higher than existing max {max_existing_score} for {username}. No update needed.")
362
  else:
363
  # User does not exist, add them
364
- logger.info(f"User {username} not found. Adding new entry.")
365
- new_entry = pd.DataFrame([{'username': username, 'score': score, 'timestamp': current_timestamp}])
 
 
 
 
 
366
  df = pd.concat([df, new_entry], ignore_index=True)
367
  needs_update = True
368
 
369
  # 3. Push updated data back to Hugging Face Hub if changes were made
370
  if needs_update:
371
- logger.info(f"Pushing updated dataset to '{HF_DATASET_ID}'...")
372
- # Convert potentially modified DataFrame back to a Dataset object
373
- # Ensure the schema matches if columns were added/modified.
374
- # Use 'train' split convention.
375
- # Make sure the dtypes are correct before creating the Dataset
376
- df['username'] = df['username'].astype(str)
377
- df['score'] = df['score'].astype(float)
378
- df['timestamp'] = df['timestamp'].astype(str)
379
-
380
- updated_ds = DatasetDict({'train': Dataset.from_pandas(df)})
381
- logger.info(f"Dataset to push: {updated_ds}") # Log the dataset structure
382
- updated_ds.push_to_hub(HF_DATASET_ID) # Uncomment this line to enable leaderboard updates
383
- logger.warning("Dataset push to hub is currently commented out. Uncomment the line above to enable leaderboard updates.") # REMINDER
384
- logger.info("Dataset push simulated/attempted.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  return True
386
  else:
 
387
  return False # No update was pushed
388
 
389
  except Exception as e:
390
  logger.error(f"Error interacting with Hugging Face dataset '{HF_DATASET_ID}': {e}", exc_info=True)
391
- # Re-raise the exception to be caught by the endpoint handler
 
392
  raise HTTPException(status_code=500, detail=f"Failed to update Hugging Face dataset: {e}")
393
 
394
- # --- API Endpoints (Modified response_model) ---
395
 
396
  @app.get("/questions",
397
  # Return a list of dictionaries with arbitrary keys/values
@@ -464,27 +475,26 @@ async def submit_answers(submission: Submission = Body(...)):
464
 
465
  correct_count = 0
466
  total_attempted_in_payload = len(submission.answers)
467
- valid_attempted_count = 0 # Count attempts where task_id was valid
468
  processed_ids = set()
469
 
470
  for answer_item in submission.answers:
471
- task_id = str(answer_item.task_id) # Ensure string comparison
472
- submitted = str(answer_item.submitted_answer) # Ensure string comparison
473
 
474
- # Prevent duplicate task_id submissions in the same request
475
  if task_id in processed_ids:
476
  logger.warning(f"Duplicate task_id '{task_id}' in submission from {submission.username}. Skipping.")
477
- continue # Don't count this as an attempt for scoring
478
  processed_ids.add(task_id)
479
 
480
 
481
- # Check if task_id is valid (exists in our loaded ground truth)
482
  if task_id not in ground_truth_answers:
483
  logger.warning(f"Task ID '{task_id}' submitted by {submission.username} not found in ground truth list. Skipping this answer.")
484
- # Don't count this as a valid attempt for score calculation
485
  continue
486
 
487
- # If we reach here, the task_id is valid
488
  valid_attempted_count += 1
489
  ground_truth = ground_truth_answers[task_id]
490
  # Compare answers (case-insensitive, strip whitespace)
@@ -494,8 +504,6 @@ async def submit_answers(submission: Submission = Body(...)):
494
  else:
495
  logger.debug(f"Incorrect answer for {task_id} from {submission.username}. Submitted: '{submitted}', Expected: '{ground_truth}'")
496
 
497
-
498
- # Calculate score based on valid attempts AND total number of questions available
499
  if valid_attempted_count == 0:
500
  score = 0.0
501
  message = f"Submission received, but no valid/matching task IDs were found in the {total_attempted_in_payload} answers provided."
@@ -515,7 +523,7 @@ async def submit_answers(submission: Submission = Body(...)):
515
 
516
  # Update Hugging Face dataset
517
  try:
518
- updated = update_huggingface_dataset(submission.username, score)
519
  if updated:
520
  message += " High score updated on leaderboard."
521
  logger.info(f"Leaderboard updated for {submission.username}.")
 
 
1
  import os
2
  import pandas as pd
3
  from fastapi import FastAPI, HTTPException, Body
 
27
 
28
  ALLOWED_CACHE_BASE = os.path.abspath("/app/.cache")
29
 
 
30
  class ErrorResponse(BaseModel):
31
  detail: str
32
 
 
40
  global filtered_dataset
41
  global questions_for_api
42
  global ground_truth_answers
43
+ global task_file_paths
44
 
45
  tempo_filtered = []
 
46
  questions_for_api.clear()
47
  ground_truth_answers.clear()
48
+ task_file_paths.clear()
49
 
50
  logger.info("Starting to load and filter GAIA dataset (validation split)...")
51
  try:
 
52
  dataset = load_dataset("gaia-benchmark/GAIA", "2023_level1", split="validation", trust_remote_code=True)
53
  logger.info(f"GAIA dataset validation split loaded. Features: {dataset.features}")
54
  except Exception as e:
55
  logger.error(f"Failed to load GAIA dataset: {e}", exc_info=True)
 
56
  raise RuntimeError("Could not load the primary GAIA dataset.") from e
57
 
58
  # --- Filtering Logic based on Annotator Metadata ---
 
67
  try:
68
  num_tools = int(num_tools_str)
69
  num_steps = int(num_steps_str)
 
70
  if num_tools < tool_threshold and num_steps < step_threshold:
71
+ tempo_filtered.append(item)
72
  except ValueError:
73
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Could not convert tool/step count in metadata: tools='{num_tools_str}', steps='{num_steps_str}'.")
74
  else:
75
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - 'Number of tools' or 'Number of steps' missing in Metadata.")
76
  else:
 
77
  logger.warning(f"Skipping Task ID: {item.get('task_id', 'N/A')} - Missing 'Annotator Metadata'.")
78
 
79
+ filtered_dataset = tempo_filtered
80
  logger.info(f"Found {len(filtered_dataset)} questions matching the criteria (tools < {tool_threshold}, steps < {step_threshold}).")
81
 
82
  processed_count = 0
 
83
  for item in filtered_dataset:
84
  # Extract data from the dataset item
85
  task_id = item.get('task_id')
86
  original_question_text = item.get('Question')
87
  final_answer = item.get('Final answer')
88
+ local_file_path = item.get('file_path')
89
+ file_name = item.get('file_name')
90
 
 
 
91
  if task_id and original_question_text and final_answer is not None:
92
 
93
  # 1. Create the dictionary to be exposed via the API
94
  # (Includes 'file_name' for info, but excludes 'file_path')
95
  processed_item = {
96
  "task_id": str(task_id),
97
+ "question": str(original_question_text),
 
98
  "Level": item.get("Level"),
99
+ "file_name": file_name,
100
  }
 
101
  processed_item = {k: v for k, v in processed_item.items() if v is not None}
102
 
103
  questions_for_api.append(processed_item)
 
110
  # Log if the path from the dataset isn't absolute (might indicate issues)
111
  if not os.path.isabs(local_file_path):
112
  logger.warning(f"Task {task_id}: Path '{local_file_path}' from dataset is not absolute. This might cause issues finding the file on the server.")
113
+
 
 
114
 
 
115
  if os.path.exists(local_file_path) and os.path.isfile(local_file_path):
 
116
  task_file_paths[str(task_id)] = local_file_path
117
  logger.debug(f"Stored file path mapping for task_id {task_id}: {local_file_path}")
118
  else:
 
119
  logger.warning(f"File path '{local_file_path}' for task_id {task_id} does NOT exist or is not a file on server. Mapping skipped.")
120
+ elif task_id:
121
+
 
122
  if not local_file_path and not file_name:
123
  logger.debug(f"Task {task_id}: No 'file_path' or 'file_name' found in dataset item. No file mapping stored.")
124
  elif not local_file_path:
 
129
 
130
  processed_count += 1
131
  else:
 
132
  logger.warning(f"Skipping item processing due to missing essential fields: task_id={task_id}, has_question={original_question_text is not None}, has_answer={final_answer is not None}")
133
 
 
134
  logger.info(f"Successfully processed {processed_count} questions for the API.")
135
  logger.info(f"Stored file path mappings for {len(task_file_paths)} tasks.")
136
 
137
  if not questions_for_api:
138
  logger.error("CRITICAL: No valid questions were loaded after filtering and processing. API endpoints like /questions will fail.")
139
+
 
140
 
141
 
142
 
 
144
  task_id: str
145
  question: str
146
  Level: Optional[str] = None
147
+ file_name: Optional[str] = None
 
148
 
149
 
150
  # --- The rest of your Pydantic models remain the same ---
 
154
 
155
  class Submission(BaseModel):
156
  username: str = Field(..., description="Hugging Face username", min_length=1)
157
+ agent_code: str = Field(..., description="The Python class code for the agent")
158
  answers: List[AnswerItem] = Field(..., description="List of answers submitted by the agent")
159
 
160
  class ScoreResponse(BaseModel):
 
168
  class ErrorResponse(BaseModel):
169
  detail: str
170
 
 
 
171
  class AnswerItem(BaseModel):
172
  task_id: str
173
  submitted_answer: str = Field(..., description="The agent's answer for the task_id")
174
 
175
  class Submission(BaseModel):
176
  username: str = Field(..., description="Hugging Face username", min_length=1)
177
+ agent_code: str = Field(..., description="The Python class code for the agent", min_length=10)
178
  answers: List[AnswerItem] = Field(..., description="List of answers submitted by the agent")
179
 
180
  class ScoreResponse(BaseModel):
 
207
  logger.info(f"Successfully loaded {len(questions_for_api)} questions.")
208
  except Exception as e:
209
  logger.error(f"CRITICAL ERROR DURING STARTUP while loading questions: {e}", exc_info=True)
210
+
 
211
 
212
 
 
213
  @app.get("/files/{task_id}",
214
  summary="Get Associated File by Task ID",
215
  description="Downloads the file associated with the given task_id, if one exists and is mapped.",
216
  responses={
217
  200: {
218
  "description": "File content.",
219
+ "content": {"*/*": {}}
220
  },
221
  403: {"model": ErrorResponse, "description": "Access denied (e.g., path traversal attempt)."},
222
  404: {"model": ErrorResponse, "description": "Task ID not found, no file associated, or file missing on server."},
 
239
 
240
  # --- CRUCIAL SECURITY CHECK ---
241
  try:
242
+
 
243
  abs_file_path = os.path.abspath(local_file_path)
244
  abs_base_path = ALLOWED_CACHE_BASE # Already absolute
245
 
 
246
  if not abs_file_path.startswith(abs_base_path):
247
  logger.error(f"SECURITY ALERT: Path traversal attempt denied for task_id '{task_id}'. Path '{local_file_path}' resolves outside base '{abs_base_path}'.")
248
  raise HTTPException(status_code=403, detail="File access denied.")
249
 
 
250
  if not os.path.exists(abs_file_path) or not os.path.isfile(abs_file_path):
251
  logger.error(f"File not found on server for task_id '{task_id}' at expected path: {abs_file_path}")
252
  raise HTTPException(status_code=404, detail=f"File associated with task_id {task_id} not found on server disk.")
253
 
254
  except HTTPException as http_exc:
255
+ raise http_exc
256
  except Exception as path_err:
257
  logger.error(f"Error resolving or checking path '{local_file_path}' for task_id '{task_id}': {path_err}", exc_info=True)
258
  raise HTTPException(status_code=500, detail="Server error validating file path.")
 
259
 
 
 
 
260
 
261
+ mime_type, _ = mimetypes.guess_type(abs_file_path)
262
+ media_type = mime_type if mime_type else "application/octet-stream"
263
+
264
  file_name_for_download = os.path.basename(abs_file_path)
265
 
266
  logger.info(f"Serving file '{file_name_for_download}' (type: {media_type}) for task_id '{task_id}' from path: {abs_file_path}")
267
 
 
268
  return FileResponse(path=abs_file_path, media_type=media_type, filename=file_name_for_download)
269
+
270
+ def update_huggingface_dataset(username: str, score: float, code_link: str):
271
+ """
272
+ Loads the dataset, updates the score and code link if the score is higher,
273
+ and pushes back to the Hugging Face Hub.
274
+
275
+ Args:
276
+ username: The username of the participant.
277
+ score: The new score achieved by the participant.
278
+ code_link: The link to the code submission associated with this score.
279
+
280
+ Returns:
281
+ True if the dataset was updated and pushed, False otherwise.
282
+
283
+ Raises:
284
+ HTTPException: If there's an error interacting with the dataset.
285
+ """
286
  try:
287
+ # Define the expected schema including the 'code' column
288
+ expected_columns = {
289
+ 'username': 'str',
290
+ 'score': 'float',
291
+ 'timestamp': 'str',
292
+ 'code': 'str' # Added the code column
293
+ }
294
+
295
+ # 1. Attempt to load the dataset
296
+ logger.info(f"Attempting to load dataset '{HF_DATASET_ID}'...")
297
  ds_dict = None
298
+ df = None
299
  try:
300
+ # Try downloading a file first to check existence without loading full dataset if large
301
+ # This might not be necessary if load_dataset handles non-existence gracefully
302
+ # hf_hub_download(repo_id=HF_DATASET_ID, filename="data/train-00000-of-00001.parquet", repo_type="dataset")
303
+ ds_dict = load_dataset(HF_DATASET_ID, trust_remote_code=True) # Added trust_remote_code=True if needed
304
  logger.info("Dataset loaded successfully.")
305
+ if "train" in ds_dict:
306
+ df = ds_dict['train'].to_pandas()
 
 
 
307
  else:
308
+ logger.warning(f"Dataset '{HF_DATASET_ID}' loaded but no 'train' split found. Creating structure.")
309
+ df = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in expected_columns.items()})
310
+
311
+ except Exception as load_error:
312
+ logger.warning(f"Could not load dataset '{HF_DATASET_ID}' or it's empty/new ({load_error}). Will create structure.")
313
+ # Create an empty DataFrame with the correct schema if loading failed
314
+ df = pd.DataFrame({col: pd.Series(dtype=dtype) for col, dtype in expected_columns.items()})
315
+
316
+ # Ensure all expected columns exist, add if they don't
317
+ for col, dtype in expected_columns.items():
318
+ if col not in df.columns:
319
+ logger.warning(f"Column '{col}' not found in loaded data. Adding it.")
320
+ # Use appropriate default based on dtype if needed, though concat handles it
321
+ df[col] = pd.Series(dtype=dtype)
322
+
323
+ # Convert score column to numeric, coercing errors, and fill NaNs
324
+ df['score'] = pd.to_numeric(df['score'], errors='coerce').fillna(0.0)
325
+ # Ensure other columns have correct types, fill NaNs for string columns
326
+ df['username'] = df['username'].astype(str).fillna('')
327
+ df['timestamp'] = df['timestamp'].astype(str).fillna('')
328
+ df['code'] = df['code'].astype(str).fillna('') # Ensure code column is string
 
 
329
 
330
  # 2. Find existing score for the user
331
  existing_entries = df[df['username'] == username]
 
334
 
335
  if not existing_entries.empty:
336
  # User exists, find their highest score
337
+ max_existing_score = existing_entries['score'].max() # Already numeric
 
338
  if score > max_existing_score:
339
+ logger.info(f"New score {score} is higher than existing max {max_existing_score} for {username}. Updating entry.")
340
+ # Remove *all* old entries for this user to replace with the single best one
341
+ df = df[df['username'] != username].copy() # Use .copy() to avoid SettingWithCopyWarning
342
+ # Add new entry with score and code link
343
+ new_entry = pd.DataFrame([{
344
+ 'username': username,
345
+ 'score': score,
346
+ 'timestamp': current_timestamp,
347
+ 'code': code_link # Add the code link here
348
+ }])
349
  df = pd.concat([df, new_entry], ignore_index=True)
350
  needs_update = True
351
  else:
352
  logger.info(f"New score {score} is not higher than existing max {max_existing_score} for {username}. No update needed.")
353
  else:
354
  # User does not exist, add them
355
+ logger.info(f"User {username} not found. Adding new entry with score {score}.")
356
+ new_entry = pd.DataFrame([{
357
+ 'username': username,
358
+ 'score': score,
359
+ 'timestamp': current_timestamp,
360
+ 'code': code_link # Add the code link here
361
+ }])
362
  df = pd.concat([df, new_entry], ignore_index=True)
363
  needs_update = True
364
 
365
  # 3. Push updated data back to Hugging Face Hub if changes were made
366
  if needs_update:
367
+ logger.info(f"Preparing to push updated dataset to '{HF_DATASET_ID}'...")
368
+
369
+ # Ensure final DataFrame columns match the expected schema exactly before converting
370
+ # Select and order columns just in case
371
+ df = df[list(expected_columns.keys())]
372
+ # Explicitly cast types again before creating Dataset object
373
+ for col, dtype in expected_columns.items():
374
+ # Handle potential pandas nullable types if necessary, default to standard types
375
+ if dtype == 'str':
376
+ df[col] = df[col].astype(str).fillna('')
377
+ elif dtype == 'float':
378
+ df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0.0) # Ensure float conversion
379
+ # Add other type handling if needed
380
+
381
+ logger.info(f"Final DataFrame columns and types:\n{df.dtypes}")
382
+ logger.info(f"Sample data before push:\n{df.head().to_string()}")
383
+
384
+ # Create the Dataset object from the final DataFrame
385
+ updated_ds = Dataset.from_pandas(df)
386
+ # Wrap it in a DatasetDict (standard practice)
387
+ final_ds_dict = DatasetDict({'train': updated_ds})
388
+
389
+ logger.info(f"Dataset structure to push: {final_ds_dict}")
390
+
391
+ # *** UNCOMMENT THIS LINE TO ACTUALLY PUSH THE DATA ***
392
+ # final_ds_dict.push_to_hub(HF_DATASET_ID)
393
+ # logger.info(f"Successfully pushed updated dataset to '{HF_DATASET_ID}'.")
394
+ logger.warning("Dataset push to hub is currently commented out in the code. Uncomment the 'push_to_hub' line to enable leaderboard updates.")
395
  return True
396
  else:
397
+ logger.info("No changes needed, dataset not pushed.")
398
  return False # No update was pushed
399
 
400
  except Exception as e:
401
  logger.error(f"Error interacting with Hugging Face dataset '{HF_DATASET_ID}': {e}", exc_info=True)
402
+ # Re-raise the exception to be caught by the endpoint handler or calling function
403
+ # Adjust the exception type if not using FastAPI's HTTPException
404
  raise HTTPException(status_code=500, detail=f"Failed to update Hugging Face dataset: {e}")
405
 
 
406
 
407
  @app.get("/questions",
408
  # Return a list of dictionaries with arbitrary keys/values
 
475
 
476
  correct_count = 0
477
  total_attempted_in_payload = len(submission.answers)
478
+ valid_attempted_count = 0
479
  processed_ids = set()
480
 
481
  for answer_item in submission.answers:
482
+ task_id = str(answer_item.task_id)
483
+ submitted = str(answer_item.submitted_answer)
484
 
485
+
486
  if task_id in processed_ids:
487
  logger.warning(f"Duplicate task_id '{task_id}' in submission from {submission.username}. Skipping.")
488
+ continue
489
  processed_ids.add(task_id)
490
 
491
 
492
+
493
  if task_id not in ground_truth_answers:
494
  logger.warning(f"Task ID '{task_id}' submitted by {submission.username} not found in ground truth list. Skipping this answer.")
 
495
  continue
496
 
497
+
498
  valid_attempted_count += 1
499
  ground_truth = ground_truth_answers[task_id]
500
  # Compare answers (case-insensitive, strip whitespace)
 
504
  else:
505
  logger.debug(f"Incorrect answer for {task_id} from {submission.username}. Submitted: '{submitted}', Expected: '{ground_truth}'")
506
 
 
 
507
  if valid_attempted_count == 0:
508
  score = 0.0
509
  message = f"Submission received, but no valid/matching task IDs were found in the {total_attempted_in_payload} answers provided."
 
523
 
524
  # Update Hugging Face dataset
525
  try:
526
+ updated = update_huggingface_dataset(submission.username, score, submission.agent_code)
527
  if updated:
528
  message += " High score updated on leaderboard."
529
  logger.info(f"Leaderboard updated for {submission.username}.")