Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

App Files Files Community

KeivanR commited on Mar 29

Commit

8524cf7

1 Parent(s): b820b0a

evaluate

Browse files

Files changed (3) hide show

qwen_classifier/evaluate.py +77 -4
qwen_classifier/predict.py +0 -2
setup.py +3 -1

qwen_classifier/evaluate.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import numpy as np
 from sklearn.metrics import classification_report
 import zipfile
 import json
 import pandas as pd
-from .config import TAG_NAMES
 def load_data(test_data_path):
     # zip file handler
@@ -31,7 +36,6 @@ def load_data(test_data_path):
     return df
 def preprocessing(df):
-    # Example dataset
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()
@@ -40,9 +44,78 @@ def preprocessing(df):
     # labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
     df = pd.DataFrame({'text':texts, 'labels': labels})
-def evaluate_model(test_data_path):
     df = load_data(test_data_path)
     df = preprocessing(df)
-    return metrics

 import numpy as np
 from sklearn.metrics import classification_report
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 import zipfile
 import json
 import pandas as pd
+import torch
+from datasets import Dataset
+from torch.utils.data import DataLoader
+from .config import TAG_NAMES, DEVICE
+from .predict import predict_batch
 def load_data(test_data_path):
     # zip file handler
     return df
 def preprocessing(df):
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()
     # labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
     df = pd.DataFrame({'text':texts, 'labels': labels})
+    return df
+def evaluate_batch(text, hf_repo, backend="local", hf_token=None):
+    if backend == "local":
+        return _evaluate_local(text, hf_repo)
+    elif backend == "hf":
+        return _evaluate_hf_api(text, hf_token)
+    else:
+        raise ValueError(f"Unknown backend: {backend}")
+def _evaluate_local(test_data_path, hf_repo):
+    global local_model, local_tokenizer
+    # Lazy-loading to avoid slow startup
+    if local_model is None:
+        from .model import QwenClassifier
+        from transformers import AutoTokenizer
+        local_model = QwenClassifier.from_pretrained(hf_repo).eval()
+        local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
     df = load_data(test_data_path)
     df = preprocessing(df)
+    hf_dataset = Dataset.from_pandas(df)
+    # Then apply tokenization
+    def tokenize_function(examples):
+        return local_tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+    dataset = hf_dataset.map(tokenize_function, batched=True)
+    dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
+    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
+    local_model.eval()
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        for batch in dataloader:
+            batch = {k: v.to(DEVICE) for k, v in batch.items()}
+            labels = batch["labels"].type(torch.float32)
+            logits = local_model(batch["input_ids"], batch["attention_mask"])
+            preds = torch.sigmoid(logits).cpu().numpy() > 0.5
+            labels = labels.cpu().numpy()
+            all_preds.extend(preds)
+            all_labels.extend(labels)
+    val_acc = accuracy_score(all_labels, all_preds)
+    val_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
+    val_rec = recall_score(all_labels, all_preds, average='macro')
+    val_f1 = f1_score(all_labels, all_preds, average='macro')
+    val_prec_per_class = precision_score(all_labels, all_preds, average=None, zero_division=0)
+    val_rec_per_class = recall_score(all_labels, all_preds, average=None)
+    val_f1_per_class = f1_score(all_labels, all_preds, average=None)
+    metrics = {
+        val_acc,
+        val_prec,
+        val_rec,
+        val_f1,
+        val_prec_per_class,
+        val_rec_per_class,
+        val_f1_per_class
+    }
+    report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
+    return metrics, report

qwen_classifier/predict.py CHANGED Viewed

@@ -31,8 +31,6 @@ def _predict_local(text, hf_repo):
     return _process_output(logits)
 def _predict_hf_api(text, hf_token=None):
     try:
         response = requests.post(
             f"{SPACE_URL}/predict",

     return _process_output(logits)
 def _predict_hf_api(text, hf_token=None):
     try:
         response = requests.post(
             f"{SPACE_URL}/predict",

setup.py CHANGED Viewed

@@ -12,7 +12,9 @@ setup(
         'huggingface_hub',
         'requests',
         'pandas',
-        'pydantic'
     ],
     entry_points={
         'console_scripts': [

         'huggingface_hub',
         'requests',
         'pandas',
+        'pydantic',
+        'datasets',
+        'scikit-learn'
     ],
     entry_points={
         'console_scripts': [