Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

App Files Files Community

KeivanR commited on Mar 29

Commit

748a976

1 Parent(s): c4ad33b

readme and shared global model and tokenizer

Browse files

Files changed (4) hide show

README.md +88 -11
app.py +7 -1
qwen_classifier/evaluate.py +8 -7
qwen_classifier/predict.py +7 -10

README.md CHANGED Viewed

@@ -1,11 +1,88 @@
----
-title: Qwen Classifier Demo
-emoji: 🏢
-colorFrom: green
-colorTo: gray
-sdk: docker
-pinned: false
-short_description: Fine tuned Qwen to classify coding exercizes
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Qwen Multi-label Text Classifier
+## Overview
+A multi-label text classifier based on Qwen-1.5B, fine-tuned for coding exercise classification. Supports:
+- Local CPU/GPU inference
+- Hugging Face API deployment
+- Batch evaluation
+- REST API via FastAPI
+- Docker deployment
+## Features
+- **9 Label Classification**: Predicts multiple tags per text
+- **CLI Interface**: Run predictions/evaluations from terminal
+- **Dual Backend**: Choose between local or HF inference
+- **GPU Optimized**: CUDA support via Docker
+## Installation
+```bash
+git clone https://github.com/your-username/qwen-classifier
+cd qwen-classifier
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e .
+```
+## Usage
+### CLI Prediction
+```bash
+# Local inference
+qwen-clf predict "Your coding exercise text" --backend local
+# HF Space inference
+qwen-clf predict "Your text" --backend hf --hf-token YOUR_TOKEN
+```
+### Batch Evaluation
+```bash
+qwen-clf evaluate dataset.zip --backend local
+```
+### API Server
+```bash
+uvicorn app:app --host 0.0.0.0 --port 7860
+```
+#### API Endpoints
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/` | GET | Documentation |
+| `/predict` | POST | Single text prediction |
+| `/evaluate` | POST | Batch evaluation (ZIP) |
+| `/health` | GET | Service status |
+## Docker Deployment
+```bash
+# Build with GPU support
+docker build -t qwen-classifier .
+# Run container
+docker run -p 7860:7860 --gpus all qwen-classifier
+```
+## Project Structure
+```
+.
+├── app.py               # FastAPI entry point
+├── Dockerfile           # GPU-optimized container setup
+├── qwen_classifier/     # Core package
+│   ├── cli.py          # Command line interface
+│   ├── model.py        # Qwen classifier implementation
+│   ├── predict.py      # Inference logic
+│   └── evaluate.py     # Batch evaluation
+└── requirements.txt    # Python dependencies
+```
+## Configuration
+Edit `qwen_classifier/config.py` to set:
+- `TAG_NAMES`: List of 9 classification tags
+- `HF_REPO`: Default Hugging Face model repo
+- `DEVICE`: Auto-detected CUDA/CPU
+## Hugging Face Space
+Live demo:
+[![HF Space](https://img.shields.io/badge/🤗%20Hugging%20Face-Space-blue)](https://huggingface.co/spaces/KeivanR/qwen-classifier-demo)
+## License
+Apache 2.0 © Keivan Razban

app.py CHANGED Viewed

@@ -7,12 +7,15 @@ from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
 from qwen_classifier.predict import predict_single  # Your existing function
 from qwen_classifier.evaluate import evaluate_batch  # Your existing function
 import torch
 from huggingface_hub import login
 from qwen_classifier.model import QwenClassifier
 from qwen_classifier.config import HF_REPO
 from pydantic import BaseModel
 app = FastAPI(title="Qwen Classifier")
 hf_repo = os.getenv("HF_REPO")
 if not hf_repo:
@@ -41,6 +44,7 @@ def home():
 @app.on_event("startup")
 async def load_model():
     # Warm up GPU
     torch.zeros(1).cuda()
     # Read HF_TOKEN from Hugging Face Space secrets
@@ -52,9 +56,11 @@ async def load_model():
     login(token=hf_token)
     # Load model (will cache in /home/user/.cache/huggingface)
-    app.state.model = QwenClassifier.from_pretrained(
         hf_repo,
     )
     print("Model loaded successfully!")

 from fastapi.responses import HTMLResponse
 from qwen_classifier.predict import predict_single  # Your existing function
 from qwen_classifier.evaluate import evaluate_batch  # Your existing function
+from qwen_classifier.globals import model, tokenizer
 import torch
+from transformers import AutoTokenizer
 from huggingface_hub import login
 from qwen_classifier.model import QwenClassifier
 from qwen_classifier.config import HF_REPO
 from pydantic import BaseModel
 app = FastAPI(title="Qwen Classifier")
 hf_repo = os.getenv("HF_REPO")
 if not hf_repo:
 @app.on_event("startup")
 async def load_model():
+    global model, tokenizer
     # Warm up GPU
     torch.zeros(1).cuda()
     # Read HF_TOKEN from Hugging Face Space secrets
     login(token=hf_token)
     # Load model (will cache in /home/user/.cache/huggingface)
+    model = QwenClassifier.from_pretrained(
         hf_repo,
     )
+    tokenizer = AutoTokenizer.from_pretrained(hf_repo)
     print("Model loaded successfully!")

qwen_classifier/evaluate.py CHANGED Viewed

@@ -10,6 +10,7 @@ from datasets import Dataset
 from torch.utils.data import DataLoader
 import requests
 from .config import TAG_NAMES, DEVICE, SPACE_URL
 def load_data(test_data_path):
     # zip file handler
@@ -72,15 +73,15 @@ def evaluate_batch(file_path, hf_repo, backend="local", hf_token=None):
         raise ValueError(f"Unknown backend: {backend}")
 def _evaluate_local(test_data_path, hf_repo):
-    global local_model, local_tokenizer
     # Lazy-loading to avoid slow startup
-    if local_model is None:
         from .model import QwenClassifier
         from transformers import AutoTokenizer
-        local_model = QwenClassifier.from_pretrained(hf_repo).eval()
-        local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
     df = load_data(test_data_path)
     df = preprocessing(df)
@@ -88,7 +89,7 @@ def _evaluate_local(test_data_path, hf_repo):
     # Then apply tokenization
     def tokenize_function(examples):
-        return local_tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
     dataset = hf_dataset.map(tokenize_function, batched=True)
@@ -97,7 +98,7 @@ def _evaluate_local(test_data_path, hf_repo):
     dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
-    local_model.eval()
     all_preds = []
     all_labels = []
@@ -106,7 +107,7 @@ def _evaluate_local(test_data_path, hf_repo):
             batch = {k: v.to(DEVICE) for k, v in batch.items()}
             labels = batch["labels"].type(torch.float32)
-            logits = local_model(batch["input_ids"], batch["attention_mask"])
             preds = torch.sigmoid(logits).cpu().numpy() > 0.5
             labels = labels.cpu().numpy()

 from torch.utils.data import DataLoader
 import requests
 from .config import TAG_NAMES, DEVICE, SPACE_URL
+from .globals import model, tokenizer
 def load_data(test_data_path):
     # zip file handler
         raise ValueError(f"Unknown backend: {backend}")
 def _evaluate_local(test_data_path, hf_repo):
+    global model, tokenizer
     # Lazy-loading to avoid slow startup
+    if model is None:
         from .model import QwenClassifier
         from transformers import AutoTokenizer
+        model = QwenClassifier.from_pretrained(hf_repo).eval()
+        tokenizer = AutoTokenizer.from_pretrained(hf_repo)
     df = load_data(test_data_path)
     df = preprocessing(df)
     # Then apply tokenization
     def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
     dataset = hf_dataset.map(tokenize_function, batched=True)
     dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
+    model.eval()
     all_preds = []
     all_labels = []
             batch = {k: v.to(DEVICE) for k, v in batch.items()}
             labels = batch["labels"].type(torch.float32)
+            logits = model(batch["input_ids"], batch["attention_mask"])
             preds = torch.sigmoid(logits).cpu().numpy() > 0.5
             labels = labels.cpu().numpy()

qwen_classifier/predict.py CHANGED Viewed

@@ -1,10 +1,7 @@
 import torch
 import requests
 from .config import TAG_NAMES, SPACE_URL
-# Local model setup (only load if needed)
-local_model = None
-local_tokenizer = None
 def predict_single(text, hf_repo, backend="local", hf_token=None):
     if backend == "local":
@@ -15,19 +12,19 @@ def predict_single(text, hf_repo, backend="local", hf_token=None):
         raise ValueError(f"Unknown backend: {backend}")
 def _predict_local(text, hf_repo):
-    global local_model, local_tokenizer
     # Lazy-loading to avoid slow startup
-    if local_model is None:
         from .model import QwenClassifier
         from transformers import AutoTokenizer
-        local_model = QwenClassifier.from_pretrained(hf_repo).eval()
-        local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
-    inputs = local_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
-        logits = local_model(**inputs)
     return _process_output(logits)
 def _predict_hf_api(text, hf_token=None):

 import torch
 import requests
 from .config import TAG_NAMES, SPACE_URL
+from .globals import model, tokenizer
 def predict_single(text, hf_repo, backend="local", hf_token=None):
     if backend == "local":
         raise ValueError(f"Unknown backend: {backend}")
 def _predict_local(text, hf_repo):
+    global model, tokenizer
     # Lazy-loading to avoid slow startup
+    if model is None:
         from .model import QwenClassifier
         from transformers import AutoTokenizer
+        model = QwenClassifier.from_pretrained(hf_repo).eval()
+        tokenizer = AutoTokenizer.from_pretrained(hf_repo)
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
+        logits = model(**inputs)
     return _process_output(logits)
 def _predict_hf_api(text, hf_token=None):