Jofthomas's picture
Create app/app.py
e332da4 verified
import time
import pandas as pd
from datasets import load_dataset
from fastapi import HTTPException
import logging
logger = logging.getLogger(__name__)
DATASET_NAME = "agents-course/unit4-students-scores"
CACHE_DURATION_SECONDS = 60 # Cache data for 60 seconds
# Simple in-memory cache
cached_data = None
last_cache_time = 0
def get_sorted_leaderboard_data():
"""
Loads data from Hugging Face dataset, sorts it, and caches the result.
Returns the sorted data as a list of dictionaries.
"""
global cached_data, last_cache_time
current_time = time.time()
# Check cache validity
if cached_data is not None and (current_time - last_cache_time) < CACHE_DURATION_SECONDS:
logger.info("Returning cached leaderboard data.")
return cached_data
logger.info(f"Cache expired or empty. Fetching fresh data from {DATASET_NAME}...")
try:
# Load the dataset
dataset = load_dataset(DATASET_NAME, split="train")
# Convert to pandas DataFrame for easier sorting
df = pd.DataFrame(dataset)
# Ensure required columns exist
required_columns = ['username', 'score', 'timestamp', 'code']
if not all(col in df.columns for col in required_columns):
missing = [col for col in required_columns if col not in df.columns]
raise ValueError(f"Dataset missing required columns: {missing}")
# Convert timestamp to datetime objects for proper sorting
# Handle potential errors during conversion
df['timestamp_dt'] = pd.to_datetime(df['timestamp'], errors='coerce')
# Drop rows where timestamp conversion failed
df.dropna(subset=['timestamp_dt'], inplace=True)
# Sort by score (descending) and then by timestamp (ascending)
df_sorted = df.sort_values(by=['score', 'timestamp_dt'], ascending=[False, True])
# Select only the columns needed for the frontend + code
# Convert DataFrame to list of dictionaries (JSON serializable)
# Use original timestamp string for display consistency if needed,
# but sorting was done on datetime objects.
leaderboard = df_sorted[['username', 'score', 'timestamp', 'code']].to_dict(orient='records')
# Update cache
cached_data = leaderboard
last_cache_time = current_time
logger.info(f"Successfully fetched and cached data. {len(leaderboard)} entries.")
return cached_data
except Exception as e:
logger.error(f"Error loading or processing dataset {DATASET_NAME}: {e}", exc_info=True)
# Re-raise as HTTPException so FastAPI returns a proper error response
raise HTTPException(status_code=500, detail=f"Failed to load or process leaderboard data: {e}")
# Optional: Add an __init__.py file in the app directory
# Create an empty file named app/__init__.py