from fastapi import FastAPI from qwen_classifier.predict import predict_single # Your existing function import torch app = FastAPI(title="Qwen Classifier") @app.on_event("startup") async def load_model(): # Warm up GPU torch.zeros(1).cuda() @app.post("/predict") async def predict(text: str): return predict_single(text, backend="local")