import os os.environ['HF_HOME'] = '/tmp/.cache/huggingface' # Use /tmp in Spaces os.makedirs(os.environ['HF_HOME'], exist_ok=True) # Ensure directory exists from fastapi import FastAPI from qwen_classifier.predict import predict_single # Your existing function from qwen_classifier.evaluate import evaluate_batch # Your existing function import torch from huggingface_hub import login from qwen_classifier.model import QwenClassifier from pydantic import BaseModel app = FastAPI(title="Qwen Classifier") hf_repo = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446' @app.on_event("startup") async def load_model(): # Warm up GPU torch.zeros(1).cuda() # Read HF_TOKEN from Hugging Face Space secrets hf_token = os.getenv("HF_TOKEN") if not hf_token: raise ValueError("HF_TOKEN not found in environment variables") # Authenticate login(token=hf_token) # Load model (will cache in /home/user/.cache/huggingface) app.state.model = QwenClassifier.from_pretrained( hf_repo, ) print("Model loaded successfully!") class PredictionRequest(BaseModel): text: str # ← Enforces that 'text' must be a non-empty string @app.post("/predict") async def predict(request: PredictionRequest): # ← Validates input automatically return predict_single(request.text, hf_repo, backend="local") @app.post("/evaluate") async def evaluate(request: PredictionRequest): # ← Validates input automatically return evaluate_batch(request.text, backend="local")