import time import pandas as pd from datasets import load_dataset from fastapi import HTTPException import logging logger = logging.getLogger(__name__) DATASET_NAME = "agents-course/unit4-students-scores" CACHE_DURATION_SECONDS = 60 # Cache data for 60 seconds # Simple in-memory cache cached_data = None last_cache_time = 0 def get_sorted_leaderboard_data(): """ Loads data from Hugging Face dataset, sorts it, and caches the result. Returns the sorted data as a list of dictionaries. """ global cached_data, last_cache_time current_time = time.time() # Check cache validity if cached_data is not None and (current_time - last_cache_time) < CACHE_DURATION_SECONDS: logger.info("Returning cached leaderboard data.") return cached_data logger.info(f"Cache expired or empty. Fetching fresh data from {DATASET_NAME}...") try: # Load the dataset dataset = load_dataset(DATASET_NAME, split="train") # Convert to pandas DataFrame for easier sorting df = pd.DataFrame(dataset) # Ensure required columns exist required_columns = ['username', 'score', 'timestamp', 'code'] if not all(col in df.columns for col in required_columns): missing = [col for col in required_columns if col not in df.columns] raise ValueError(f"Dataset missing required columns: {missing}") # Convert timestamp to datetime objects for proper sorting # Handle potential errors during conversion df['timestamp_dt'] = pd.to_datetime(df['timestamp'], errors='coerce') # Drop rows where timestamp conversion failed df.dropna(subset=['timestamp_dt'], inplace=True) # Sort by score (descending) and then by timestamp (ascending) df_sorted = df.sort_values(by=['score', 'timestamp_dt'], ascending=[False, True]) # Select only the columns needed for the frontend + code # Convert DataFrame to list of dictionaries (JSON serializable) # Use original timestamp string for display consistency if needed, # but sorting was done on datetime objects. leaderboard = df_sorted[['username', 'score', 'timestamp', 'code']].to_dict(orient='records') # Update cache cached_data = leaderboard last_cache_time = current_time logger.info(f"Successfully fetched and cached data. {len(leaderboard)} entries.") return cached_data except Exception as e: logger.error(f"Error loading or processing dataset {DATASET_NAME}: {e}", exc_info=True) # Re-raise as HTTPException so FastAPI returns a proper error response raise HTTPException(status_code=500, detail=f"Failed to load or process leaderboard data: {e}") # Optional: Add an __init__.py file in the app directory # Create an empty file named app/__init__.py