Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

App Files Files Community

KeivanR commited on Mar 29

Commit

5d27647

1 Parent(s): da9a9de

app front and hf repo in config

Browse files

Files changed (4) hide show

app.py +30 -4
qwen_classifier/cli.py +2 -1
qwen_classifier/config.py +1 -1
qwen_classifier/evaluate.py +16 -1

app.py CHANGED Viewed

@@ -4,15 +4,37 @@ os.environ['HF_HOME'] = '/tmp/.cache/huggingface'  # Use /tmp in Spaces
 os.makedirs(os.environ['HF_HOME'], exist_ok=True)  # Ensure directory exists
 from fastapi import FastAPI
 from qwen_classifier.predict import predict_single  # Your existing function
 from qwen_classifier.evaluate import evaluate_batch  # Your existing function
 import torch
 from huggingface_hub import login
 from qwen_classifier.model import QwenClassifier
 from pydantic import BaseModel
 app = FastAPI(title="Qwen Classifier")
-hf_repo = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446'
 @app.on_event("startup")
 async def load_model():
@@ -28,7 +50,7 @@ async def load_model():
     # Load model (will cache in /home/user/.cache/huggingface)
     app.state.model = QwenClassifier.from_pretrained(
-        hf_repo,
     )
     print("Model loaded successfully!")
@@ -39,8 +61,12 @@ class PredictionRequest(BaseModel):
 @app.post("/predict")
 async def predict(request: PredictionRequest):  # ← Validates input automatically
-    return predict_single(request.text, hf_repo, backend="local")
 @app.post("/evaluate")
 async def evaluate(request: PredictionRequest):  # ← Validates input automatically
-    return evaluate_batch(request.text, backend="local")

 os.makedirs(os.environ['HF_HOME'], exist_ok=True)  # Ensure directory exists
 from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
 from qwen_classifier.predict import predict_single  # Your existing function
 from qwen_classifier.evaluate import evaluate_batch  # Your existing function
 import torch
 from huggingface_hub import login
 from qwen_classifier.model import QwenClassifier
+from qwen_classifier.config import HF_REPO, SPACE_URL
 from pydantic import BaseModel
 app = FastAPI(title="Qwen Classifier")
+# Add this endpoint
+@app.get("/", response_class=HTMLResponse)
+def home():
+    return f"""
+    <html>
+        <head>
+            <title>Qwen Classifier</title>
+        </head>
+        <body>
+            <h1>Qwen Classifier API</h1>
+            <p>Available endpoints:</p>
+            <ul>
+                <li><strong>POST /predict</strong> - Classify text</li>
+                <li><strong>POST /evaluate</strong> - Evaluate batch text prediction from zip file</li>
+                <li><strong>GET /health</strong> - Check API status</li>
+            </ul>
+            <p>Try it: <code>curl -X POST {SPACE_URL}/predict -H "Content-Type: application/json" -d '{"text":"your text"}'</code></p>
+        </body>
+    </html>
+    """
 @app.on_event("startup")
 async def load_model():
     # Load model (will cache in /home/user/.cache/huggingface)
     app.state.model = QwenClassifier.from_pretrained(
+        HF_REPO,
     )
     print("Model loaded successfully!")
 @app.post("/predict")
 async def predict(request: PredictionRequest):  # ← Validates input automatically
+    return predict_single(request.text, HF_REPO, backend="local")
 @app.post("/evaluate")
 async def evaluate(request: PredictionRequest):  # ← Validates input automatically
+    return evaluate_batch(request.text, HF_REPO, backend="local")
+@app.get("/health")
+def health_check():
+    return {"status": "healthy", "model": "loaded"}

qwen_classifier/cli.py CHANGED Viewed

@@ -2,6 +2,7 @@ import click
 from .predict import predict_single
 import warnings
 from transformers import logging as hf_logging
 def configure_logging(debug):
     """Configure warning and logging levels based on debug flag"""
@@ -24,7 +25,7 @@ def cli(ctx, debug):
 @cli.command()
 @click.argument('text')
 @click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
-@click.option('--hf-repo', default="KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_lora-1743189446", help="Hugging Face model repo")
 @click.option('--backend',
               type=click.Choice(['local', 'hf'], case_sensitive=False),
               default='local',

 from .predict import predict_single
 import warnings
 from transformers import logging as hf_logging
+from .config import HF_REPO
 def configure_logging(debug):
     """Configure warning and logging levels based on debug flag"""
 @cli.command()
 @click.argument('text')
 @click.option('--hf-token', envvar="HF_TOKEN", help="HF API token (or set HF_TOKEN env variable)")
+@click.option('--hf-repo', default=HF_REPO, help="Hugging Face model repo")
 @click.option('--backend',
               type=click.Choice(['local', 'hf'], case_sensitive=False),
               default='local',

qwen_classifier/config.py CHANGED Viewed

@@ -5,6 +5,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # HF API config
 SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
 TAG_NAMES = [
     'games',
@@ -16,5 +17,4 @@ TAG_NAMES = [
     'probabilities',
     'strings',
     'trees'
     ]

 # HF API config
 SPACE_URL = "https://keivanr-qwen-classifier-demo.hf.space"
+HF_REPO = 'KeivanR/Qwen2.5-1.5B-Instruct-MLB-clf_3layer-1743180976'
 TAG_NAMES = [
     'games',
     'probabilities',
     'strings',
     'trees'
     ]

qwen_classifier/evaluate.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import numpy as np
 from sklearn.metrics import classification_report
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 import zipfile
 import json
 import pandas as pd
@@ -12,7 +13,7 @@ from .config import TAG_NAMES, DEVICE, SPACE_URL
 def load_data(test_data_path):
     # zip file handler
-    zip_file = zipfile.ZipFile('code_classification_dataset.zip')
     # list available files in the container
     names = zip_file.namelist()
@@ -36,6 +37,20 @@ def load_data(test_data_path):
     return df
 def preprocessing(df):
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()

 import numpy as np
 from sklearn.metrics import classification_report
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from sklearn.preprocessing import MultiLabelBinarizer
 import zipfile
 import json
 import pandas as pd
 def load_data(test_data_path):
     # zip file handler
+    zip_file = zipfile.ZipFile(test_data_path)
     # list available files in the container
     names = zip_file.namelist()
     return df
 def preprocessing(df):
+    mlb = MultiLabelBinarizer()
+    tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
+    # Filter tags and one-hot encode
+    df['tags_filtered'] = [[tag for tag in tags if tag in tags_to_encode] for tags in df["tags"]]
+    df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'] = df.loc[df['tags_filtered'].apply(len) == 0, 'tags_filtered'].apply(lambda x: ['other'])
+    encoded_tags = mlb.fit_transform(df['tags_filtered'])
+    # Create a new DataFrame with one-hot encoded columns
+    encoded_df = pd.DataFrame(encoded_tags, columns=mlb.classes_)
+    # Concatenate the encoded tags with the original DataFrame
+    df = pd.concat([df, encoded_df], axis=1)
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()