|
import time |
|
import pandas as pd |
|
from datasets import load_dataset |
|
from fastapi import HTTPException |
|
import logging |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
DATASET_NAME = "agents-course/unit4-students-scores" |
|
CACHE_DURATION_SECONDS = 60 |
|
|
|
|
|
cached_data = None |
|
last_cache_time = 0 |
|
|
|
def get_sorted_leaderboard_data(): |
|
""" |
|
Loads data from Hugging Face dataset, sorts it, and caches the result. |
|
Returns the sorted data as a list of dictionaries. |
|
""" |
|
global cached_data, last_cache_time |
|
current_time = time.time() |
|
|
|
|
|
if cached_data is not None and (current_time - last_cache_time) < CACHE_DURATION_SECONDS: |
|
logger.info("Returning cached leaderboard data.") |
|
return cached_data |
|
|
|
logger.info(f"Cache expired or empty. Fetching fresh data from {DATASET_NAME}...") |
|
try: |
|
|
|
dataset = load_dataset(DATASET_NAME, split="train") |
|
|
|
|
|
df = pd.DataFrame(dataset) |
|
|
|
|
|
required_columns = ['username', 'score', 'timestamp', 'code'] |
|
if not all(col in df.columns for col in required_columns): |
|
missing = [col for col in required_columns if col not in df.columns] |
|
raise ValueError(f"Dataset missing required columns: {missing}") |
|
|
|
|
|
|
|
df['timestamp_dt'] = pd.to_datetime(df['timestamp'], errors='coerce') |
|
|
|
|
|
df.dropna(subset=['timestamp_dt'], inplace=True) |
|
|
|
|
|
df_sorted = df.sort_values(by=['score', 'timestamp_dt'], ascending=[False, True]) |
|
|
|
|
|
|
|
|
|
|
|
leaderboard = df_sorted[['username', 'score', 'timestamp', 'code']].to_dict(orient='records') |
|
|
|
|
|
cached_data = leaderboard |
|
last_cache_time = current_time |
|
logger.info(f"Successfully fetched and cached data. {len(leaderboard)} entries.") |
|
|
|
return cached_data |
|
|
|
except Exception as e: |
|
logger.error(f"Error loading or processing dataset {DATASET_NAME}: {e}", exc_info=True) |
|
|
|
raise HTTPException(status_code=500, detail=f"Failed to load or process leaderboard data: {e}") |
|
|
|
|
|
|