KeivanR commited on
Commit
8524cf7
·
1 Parent(s): b820b0a
qwen_classifier/evaluate.py CHANGED
@@ -1,9 +1,14 @@
1
  import numpy as np
2
  from sklearn.metrics import classification_report
 
3
  import zipfile
4
  import json
5
  import pandas as pd
6
- from .config import TAG_NAMES
 
 
 
 
7
 
8
  def load_data(test_data_path):
9
  # zip file handler
@@ -31,7 +36,6 @@ def load_data(test_data_path):
31
  return df
32
 
33
  def preprocessing(df):
34
- # Example dataset
35
  texts = df["prob_desc_description"].values.tolist()
36
  labels = df[TAG_NAMES].values.tolist()
37
 
@@ -40,9 +44,78 @@ def preprocessing(df):
40
  # labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
41
 
42
  df = pd.DataFrame({'text':texts, 'labels': labels})
 
 
 
43
 
 
 
 
 
 
 
 
44
 
45
- def evaluate_model(test_data_path):
 
 
 
 
 
 
 
 
 
46
  df = load_data(test_data_path)
47
  df = preprocessing(df)
48
- return metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  from sklearn.metrics import classification_report
3
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
4
  import zipfile
5
  import json
6
  import pandas as pd
7
+ import torch
8
+ from datasets import Dataset
9
+ from torch.utils.data import DataLoader
10
+ from .config import TAG_NAMES, DEVICE
11
+ from .predict import predict_batch
12
 
13
  def load_data(test_data_path):
14
  # zip file handler
 
36
  return df
37
 
38
  def preprocessing(df):
 
39
  texts = df["prob_desc_description"].values.tolist()
40
  labels = df[TAG_NAMES].values.tolist()
41
 
 
44
  # labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels
45
 
46
  df = pd.DataFrame({'text':texts, 'labels': labels})
47
+ return df
48
+
49
+
50
 
51
+ def evaluate_batch(text, hf_repo, backend="local", hf_token=None):
52
+ if backend == "local":
53
+ return _evaluate_local(text, hf_repo)
54
+ elif backend == "hf":
55
+ return _evaluate_hf_api(text, hf_token)
56
+ else:
57
+ raise ValueError(f"Unknown backend: {backend}")
58
 
59
+ def _evaluate_local(test_data_path, hf_repo):
60
+ global local_model, local_tokenizer
61
+
62
+ # Lazy-loading to avoid slow startup
63
+ if local_model is None:
64
+ from .model import QwenClassifier
65
+ from transformers import AutoTokenizer
66
+
67
+ local_model = QwenClassifier.from_pretrained(hf_repo).eval()
68
+ local_tokenizer = AutoTokenizer.from_pretrained(hf_repo)
69
  df = load_data(test_data_path)
70
  df = preprocessing(df)
71
+
72
+ hf_dataset = Dataset.from_pandas(df)
73
+
74
+ # Then apply tokenization
75
+ def tokenize_function(examples):
76
+ return local_tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
77
+
78
+ dataset = hf_dataset.map(tokenize_function, batched=True)
79
+
80
+ dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
81
+
82
+ dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
83
+
84
+
85
+ local_model.eval()
86
+ all_preds = []
87
+ all_labels = []
88
+
89
+ with torch.no_grad():
90
+ for batch in dataloader:
91
+ batch = {k: v.to(DEVICE) for k, v in batch.items()}
92
+ labels = batch["labels"].type(torch.float32)
93
+
94
+ logits = local_model(batch["input_ids"], batch["attention_mask"])
95
+
96
+ preds = torch.sigmoid(logits).cpu().numpy() > 0.5
97
+ labels = labels.cpu().numpy()
98
+
99
+ all_preds.extend(preds)
100
+ all_labels.extend(labels)
101
+
102
+ val_acc = accuracy_score(all_labels, all_preds)
103
+ val_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
104
+ val_rec = recall_score(all_labels, all_preds, average='macro')
105
+ val_f1 = f1_score(all_labels, all_preds, average='macro')
106
+ val_prec_per_class = precision_score(all_labels, all_preds, average=None, zero_division=0)
107
+ val_rec_per_class = recall_score(all_labels, all_preds, average=None)
108
+ val_f1_per_class = f1_score(all_labels, all_preds, average=None)
109
+
110
+ metrics = {
111
+ val_acc,
112
+ val_prec,
113
+ val_rec,
114
+ val_f1,
115
+ val_prec_per_class,
116
+ val_rec_per_class,
117
+ val_f1_per_class
118
+ }
119
+ report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
120
+
121
+ return metrics, report
qwen_classifier/predict.py CHANGED
@@ -31,8 +31,6 @@ def _predict_local(text, hf_repo):
31
  return _process_output(logits)
32
 
33
  def _predict_hf_api(text, hf_token=None):
34
-
35
-
36
  try:
37
  response = requests.post(
38
  f"{SPACE_URL}/predict",
 
31
  return _process_output(logits)
32
 
33
  def _predict_hf_api(text, hf_token=None):
 
 
34
  try:
35
  response = requests.post(
36
  f"{SPACE_URL}/predict",
setup.py CHANGED
@@ -12,7 +12,9 @@ setup(
12
  'huggingface_hub',
13
  'requests',
14
  'pandas',
15
- 'pydantic'
 
 
16
  ],
17
  entry_points={
18
  'console_scripts': [
 
12
  'huggingface_hub',
13
  'requests',
14
  'pandas',
15
+ 'pydantic',
16
+ 'datasets',
17
+ 'scikit-learn'
18
  ],
19
  entry_points={
20
  'console_scripts': [