Spaces:
Sleeping
Sleeping
app front and hf repo in config
Browse files- app.py +30 -4
- qwen_classifier/cli.py +2 -1
- qwen_classifier/config.py +1 -1
- qwen_classifier/evaluate.py +16 -1
app.py
CHANGED
@@ -4,15 +4,37 @@ os.environ['HF_HOME'] = '/tmp/.cache/huggingface' # Use /tmp in Spaces
|
|
4 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True) # Ensure directory exists
|
5 |
|
6 |
from fastapi import FastAPI
|
|
|
7 |
from qwen_classifier.predict import predict_single # Your existing function
|
8 |
from qwen_classifier.evaluate import evaluate_batch # Your existing function
|
9 |
import torch
|
10 |
from huggingface_hub import login
|
11 |
from qwen_classifier.model import QwenClassifier
|
|
|
12 |
from pydantic import BaseModel
|
13 |
|
14 |
app = FastAPI(title="Qwen Classifier")
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
@app.on_event("startup")
|
18 |
async def load_model():
|
@@ -28,7 +50,7 @@ async def load_model():
|
|
28 |
|
29 |
# Load model (will cache in /home/user/.cache/huggingface)
|
30 |
app.state.model = QwenClassifier.from_pretrained(
|
31 |
-
|
32 |
)
|
33 |
print("Model loaded successfully!")
|
34 |
|
@@ -39,8 +61,12 @@ class PredictionRequest(BaseModel):
|
|
39 |
|
40 |
@app.post("/predict")
|
41 |
async def predict(request: PredictionRequest): # ← Validates input automatically
|
42 |
-
return predict_single(request.text,
|
43 |
|
44 |
@app.post("/evaluate")
|
45 |
async def evaluate(request: PredictionRequest): # ← Validates input automatically
|
46 |
-
return evaluate_batch(request.text, backend="local")
|
|
|
|
|
|
|
|
|
|
4 |
os.makedirs(os.environ['HF_HOME'], exist_ok=True) # Ensure directory exists
|
5 |
|
6 |
from fastapi import FastAPI
|
7 |
+
from fastapi.responses import HTMLResponse
|
8 |
from qwen_classifier.predict import predict_single # Your existing function
|
9 |
from qwen_classifier.evaluate import evaluate_batch # Your existing function
|
10 |
import torch
|
11 |
from huggingface_hub import login
|
12 |
from qwen_classifier.model import QwenClassifier
|
13 |
+
from qwen_classifier.config import HF_REPO, SPACE_URL
|
14 |
from pydantic import BaseModel
|
15 |
|
16 |
app = FastAPI(title="Qwen Classifier")
|
17 |
+
|
18 |
+
# Add this endpoint
|
19 |
+
@app.get("/", response_class=HTMLResponse)
|
20 |
+
def home():
|
21 |
+
return f"""
|
22 |
+
<html>
|
23 |
+
<head>
|
24 |
+
<title>Qwen Classifier</title>
|
25 |
+
</head>
|
26 |
+
<body>
|
27 |
+
<h1>Qwen Classifier API</h1>
|
28 |
+
<p>Available endpoints:</p>
|
29 |
+
<ul>
|
30 |
+
<li><strong>POST /predict</strong> - Classify text</li>
|
31 |
+
<li><strong>POST /evaluate</strong> - Evaluate batch text prediction from zip file</li>
|
32 |
+
<li><strong>GET /health</strong> - Check API status</li>
|
33 |
+
</ul>
|
34 |
+
<p>Try it: <code>curl -X POST {SPACE_URL}/predict -H "Content-Type: application/json" -d '{"text":"your text"}'</code></p>
|
35 |
+
</body>
|
36 |
+
</html>
|
37 |
+
"""
|
38 |
|
39 |
@app.on_event("startup")
|
40 |
async def load_model():
|
|
|
50 |
|
51 |
# Load model (will cache in /home/user/.cache/huggingface)
|
52 |
app.state.model = QwenClassifier.from_pretrained(
|
53 |
+
HF_REPO,
|
54 |
)
|
55 |
print("Model loaded successfully!")
|
56 |
|
|
|
61 |
|
62 |
@app.post("/predict")
|
63 |
async def predict(request: PredictionRequest): # ← Validates input automatically
|
64 |
+
return predict_single(request.text, HF_REPO, backend="local")
|
65 |
|
66 |
@app.post("/evaluate")
|
67 |
async def evaluate(request: PredictionRequest): # ← Validates input automatically
|
68 |
+
return evaluate_batch(request.text, HF_REPO, backend="local")
|
69 |
+
|
70 |
+
@app.get("/health")
|
71 |
+
def health_check():
|
72 |
+
return {"status": "healthy", "model": "loaded"}
|
qwen_classifier/cli.py
CHANGED
@@ -2,6 +2,7 @@ import click
|
|
2 |
from .predict import predict_single
|
3 |
import warnings
|
4 |
from transformers import logging as hf_logging
|
|
|
5 |
|
6 |
def configure_logging(debug):
|
7 |
"""Configure warning and logging levels based on debug flag"""
|
@@ -24,7 +25,7 @@ def cli(ctx, debug):
|
|
24 |
@cli.command()
|
25 |
@click.argument('text')
|
26 |
@click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
|
27 |
-
@click.option('--hf-repo', default=
|
28 |
@click.option('--backend',
|
29 |
type=click.Choice(['local', 'hf'], case_sensitive=False),
|
30 |
default='local',
|
|
|
2 |
from .predict import predict_single
|
3 |
import warnings
|
4 |
from transformers import logging as hf_logging
|
5 |
+
from .config import HF_REPO
|
6 |
|
7 |
def configure_logging(debug):
|
8 |
"""Configure warning and logging levels based on debug flag"""
|
|
|
25 |
@cli.command()
|
26 |
@click.argument('text')
|
27 |
@click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
|
28 |
+
@click.option('--hf-repo', default=HF_REPO, help="Hugging Face model repo")
|
29 |
@click.option('--backend',
|
30 |
type=click.Choice(['local', 'hf'], case_sensitive=False),
|
31 |
default='local',
|
qwen_classifier/config.py
CHANGED
@@ -5,6 +5,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
5 |
|
6 |
# HF API config
|
7 |
SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
|
|
|
8 |
|
9 |
TAG_NAMES = [
|
10 |
'games',
|
@@ -16,5 +17,4 @@ TAG_NAMES = [
|
|
16 |
'probabilities',
|
17 |
'strings',
|
18 |
'trees'
|
19 |
-
|
20 |
]
|
|
|
5 |
|
6 |
# HF API config
|
7 |
SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
|
8 |
+
HF_REPO = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_3layer-1743180976'
|
9 |
|
10 |
TAG_NAMES = [
|
11 |
'games',
|
|
|
17 |
'probabilities',
|
18 |
'strings',
|
19 |
'trees'
|
|
|
20 |
]
|
qwen_classifier/evaluate.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
from sklearn.metrics import classification_report
|
3 |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
|
|
4 |
import zipfile
|
5 |
import json
|
6 |
import pandas as pd
|
@@ -12,7 +13,7 @@ from .config import TAG_NAMES, DEVICE, SPACE_URL
|
|
12 |
|
13 |
def load_data(test_data_path):
|
14 |
# zip file handler
|
15 |
-
zip_file = zipfile.ZipFile(
|
16 |
|
17 |
# list available files in the container
|
18 |
names = zip_file.namelist()
|
@@ -36,6 +37,20 @@ def load_data(test_data_path):
|
|
36 |
return df
|
37 |
|
38 |
def preprocessing(df):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
texts = df["prob_desc_description"].values.tolist()
|
40 |
labels = df[TAG_NAMES].values.tolist()
|
41 |
|
|
|
1 |
import numpy as np
|
2 |
from sklearn.metrics import classification_report
|
3 |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
4 |
+
from sklearn.preprocessing import MultiLabelBinarizer
|
5 |
import zipfile
|
6 |
import json
|
7 |
import pandas as pd
|
|
|
13 |
|
14 |
def load_data(test_data_path):
|
15 |
# zip file handler
|
16 |
+
zip_file = zipfile.ZipFile(test_data_path)
|
17 |
|
18 |
# list available files in the container
|
19 |
names = zip_file.namelist()
|
|
|
37 |
return df
|
38 |
|
39 |
def preprocessing(df):
|
40 |
+
mlb = MultiLabelBinarizer()
|
41 |
+
tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
|
42 |
+
|
43 |
+
# Filter tags and one-hot encode
|
44 |
+
df['tags_filtered'] = [[tag for tag in tags if tag in tags_to_encode] for tags in df["tags"]]
|
45 |
+
df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'] = df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'].apply(lambda x: ['other'])
|
46 |
+
encoded_tags = mlb.fit_transform(df['tags_filtered'])
|
47 |
+
|
48 |
+
# Create a new DataFrame with one-hot encoded columns
|
49 |
+
encoded_df = pd.DataFrame(encoded_tags, columns=mlb.classes_)
|
50 |
+
|
51 |
+
# Concatenate the encoded tags with the original DataFrame
|
52 |
+
df = pd.concat([df, encoded_df], axis=1)
|
53 |
+
|
54 |
texts = df["prob_desc_description"].values.tolist()
|
55 |
labels = df[TAG_NAMES].values.tolist()
|
56 |
|