Spaces:
Sleeping
Sleeping
evaluate
Browse files- qwen_classifier/evaluate.py +77 -4
- qwen_classifier/predict.py +0 -2
- setup.py +3 -1
qwen_classifier/evaluate.py
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
import numpy as np
|
2 |
from sklearn.metrics import classification_report
|
|
|
3 |
import zipfile
|
4 |
import json
|
5 |
import pandas as pd
|
6 |
-
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def load_data(test_data_path):
|
9 |
# zip file handler
|
@@ -31,7 +36,6 @@ def load_data(test_data_path):
|
|
31 |
return df
|
32 |
|
33 |
def preprocessing(df):
|
34 |
-
# Example dataset
|
35 |
texts = df["prob_desc_description"].values.tolist()
|
36 |
labels = df[TAG_NAMES].values.tolist()
|
37 |
|
@@ -40,9 +44,78 @@ def preprocessing(df):
|
|
40 |
# labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
|
41 |
|
42 |
df = pd.DataFrame({'text':texts, 'labels': labels})
|
|
|
|
|
|
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
df = load_data(test_data_path)
|
47 |
df = preprocessing(df)
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
from sklearn.metrics import classification_report
|
3 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
4 |
import zipfile
|
5 |
import json
|
6 |
import pandas as pd
|
7 |
+
import torch
|
8 |
+
from datasets import Dataset
|
9 |
+
from torch.utils.data import DataLoader
|
10 |
+
from .config import TAG_NAMES, DEVICE
|
11 |
+
from .predict import predict_batch
|
12 |
|
13 |
def load_data(test_data_path):
|
14 |
# zip file handler
|
|
|
36 |
return df
|
37 |
|
38 |
def preprocessing(df):
|
|
|
39 |
texts = df["prob_desc_description"].values.tolist()
|
40 |
labels = df[TAG_NAMES].values.tolist()
|
41 |
|
|
|
44 |
# labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
|
45 |
|
46 |
df = pd.DataFrame({'text':texts, 'labels': labels})
|
47 |
+
return df
|
48 |
+
|
49 |
+
|
50 |
|
51 |
+
def evaluate_batch(text, hf_repo, backend="local", hf_token=None):
|
52 |
+
if backend == "local":
|
53 |
+
return _evaluate_local(text, hf_repo)
|
54 |
+
elif backend == "hf":
|
55 |
+
return _evaluate_hf_api(text, hf_token)
|
56 |
+
else:
|
57 |
+
raise ValueError(f"Unknown backend: {backend}")
|
58 |
|
59 |
+
def _evaluate_local(test_data_path, hf_repo):
|
60 |
+
global local_model, local_tokenizer
|
61 |
+
|
62 |
+
# Lazy-loading to avoid slow startup
|
63 |
+
if local_model is None:
|
64 |
+
from .model import QwenClassifier
|
65 |
+
from transformers import AutoTokenizer
|
66 |
+
|
67 |
+
local_model = QwenClassifier.from_pretrained(hf_repo).eval()
|
68 |
+
local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
|
69 |
df = load_data(test_data_path)
|
70 |
df = preprocessing(df)
|
71 |
+
|
72 |
+
hf_dataset = Dataset.from_pandas(df)
|
73 |
+
|
74 |
+
# Then apply tokenization
|
75 |
+
def tokenize_function(examples):
|
76 |
+
return local_tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
|
77 |
+
|
78 |
+
dataset = hf_dataset.map(tokenize_function, batched=True)
|
79 |
+
|
80 |
+
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
|
81 |
+
|
82 |
+
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
|
83 |
+
|
84 |
+
|
85 |
+
local_model.eval()
|
86 |
+
all_preds = []
|
87 |
+
all_labels = []
|
88 |
+
|
89 |
+
with torch.no_grad():
|
90 |
+
for batch in dataloader:
|
91 |
+
batch = {k: v.to(DEVICE) for k, v in batch.items()}
|
92 |
+
labels = batch["labels"].type(torch.float32)
|
93 |
+
|
94 |
+
logits = local_model(batch["input_ids"], batch["attention_mask"])
|
95 |
+
|
96 |
+
preds = torch.sigmoid(logits).cpu().numpy() > 0.5
|
97 |
+
labels = labels.cpu().numpy()
|
98 |
+
|
99 |
+
all_preds.extend(preds)
|
100 |
+
all_labels.extend(labels)
|
101 |
+
|
102 |
+
val_acc = accuracy_score(all_labels, all_preds)
|
103 |
+
val_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
|
104 |
+
val_rec = recall_score(all_labels, all_preds, average='macro')
|
105 |
+
val_f1 = f1_score(all_labels, all_preds, average='macro')
|
106 |
+
val_prec_per_class = precision_score(all_labels, all_preds, average=None, zero_division=0)
|
107 |
+
val_rec_per_class = recall_score(all_labels, all_preds, average=None)
|
108 |
+
val_f1_per_class = f1_score(all_labels, all_preds, average=None)
|
109 |
+
|
110 |
+
metrics = {
|
111 |
+
val_acc,
|
112 |
+
val_prec,
|
113 |
+
val_rec,
|
114 |
+
val_f1,
|
115 |
+
val_prec_per_class,
|
116 |
+
val_rec_per_class,
|
117 |
+
val_f1_per_class
|
118 |
+
}
|
119 |
+
report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
|
120 |
+
|
121 |
+
return metrics, report
|
qwen_classifier/predict.py
CHANGED
@@ -31,8 +31,6 @@ def _predict_local(text, hf_repo):
|
|
31 |
return _process_output(logits)
|
32 |
|
33 |
def _predict_hf_api(text, hf_token=None):
|
34 |
-
|
35 |
-
|
36 |
try:
|
37 |
response = requests.post(
|
38 |
f"{SPACE_URL}/predict",
|
|
|
31 |
return _process_output(logits)
|
32 |
|
33 |
def _predict_hf_api(text, hf_token=None):
|
|
|
|
|
34 |
try:
|
35 |
response = requests.post(
|
36 |
f"{SPACE_URL}/predict",
|
setup.py
CHANGED
@@ -12,7 +12,9 @@ setup(
|
|
12 |
'huggingface_hub',
|
13 |
'requests',
|
14 |
'pandas',
|
15 |
-
'pydantic'
|
|
|
|
|
16 |
],
|
17 |
entry_points={
|
18 |
'console_scripts': [
|
|
|
12 |
'huggingface_hub',
|
13 |
'requests',
|
14 |
'pandas',
|
15 |
+
'pydantic',
|
16 |
+
'datasets',
|
17 |
+
'scikit-learn'
|
18 |
],
|
19 |
entry_points={
|
20 |
'console_scripts': [
|