KeivanR commited on
Commit
fe52d58
·
1 Parent(s): f25cbe8

eval fix class issue

Browse files
.gitignore CHANGED
@@ -84,4 +84,6 @@ $RECYCLE.BIN/
84
 
85
  # Project-specific (adjust as needed)
86
  qwen_classifier/__pycache__/
87
- qwen_classifier.egg-info/
 
 
 
84
 
85
  # Project-specific (adjust as needed)
86
  qwen_classifier/__pycache__/
87
+ qwen_classifier.egg-info/
88
+
89
+ qwen_classifier/test_jsons.zip
qwen_classifier/config.py CHANGED
@@ -19,4 +19,4 @@ TAG_NAMES = [
19
  'trees'
20
  ]
21
 
22
- EVAL_LIMIT = 2
 
19
  'trees'
20
  ]
21
 
22
+ EVAL_LIMIT = 20
qwen_classifier/evaluate.py CHANGED
@@ -70,7 +70,7 @@ def _load_data(test_data_path):
70
  return pd.DataFrame(data, columns=cols)
71
 
72
  def _preprocessing(df):
73
- mlb = MultiLabelBinarizer()
74
  tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
75
 
76
  # Filter tags and one-hot encode
@@ -83,7 +83,7 @@ def _preprocessing(df):
83
 
84
  # Concatenate the encoded tags with the original DataFrame
85
  df = pd.concat([df, encoded_df], axis=1)
86
-
87
  texts = df["prob_desc_description"].values.tolist()
88
  labels = df[TAG_NAMES].values.tolist()
89
 
@@ -127,7 +127,7 @@ def _evaluate_local(test_data_path, hf_repo):
127
 
128
  dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
129
 
130
- dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
131
 
132
 
133
  global_model.eval()
@@ -156,17 +156,17 @@ def _evaluate_local(test_data_path, hf_repo):
156
  val_f1_per_class = f1_score(all_labels, all_preds, average=None)
157
 
158
  metrics = {
159
- val_acc,
160
- val_prec,
161
- val_rec,
162
- val_f1,
163
- val_prec_per_class,
164
- val_rec_per_class,
165
- val_f1_per_class
166
  }
167
- report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
168
 
169
- return metrics, report
170
 
171
 
172
  def _evaluate_hf_api(file_path, hf_token=None):
@@ -178,7 +178,7 @@ def _evaluate_hf_api(file_path, hf_token=None):
178
  "Authorization": f"Bearer {hf_token}",
179
  "Content-Type": "application/json"
180
  } if hf_token else {"Content-Type": "application/json"},
181
- timeout=10
182
  )
183
  response.raise_for_status() # Raise HTTP errors
184
  return response.json()
 
70
  return pd.DataFrame(data, columns=cols)
71
 
72
  def _preprocessing(df):
73
+ mlb = MultiLabelBinarizer(classes = TAG_NAMES)
74
  tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
75
 
76
  # Filter tags and one-hot encode
 
83
 
84
  # Concatenate the encoded tags with the original DataFrame
85
  df = pd.concat([df, encoded_df], axis=1)
86
+ print(df.columns)
87
  texts = df["prob_desc_description"].values.tolist()
88
  labels = df[TAG_NAMES].values.tolist()
89
 
 
127
 
128
  dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
129
 
130
+ dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
131
 
132
 
133
  global_model.eval()
 
156
  val_f1_per_class = f1_score(all_labels, all_preds, average=None)
157
 
158
  metrics = {
159
+ 'Accuracy':(100*val_acc).astype(int),
160
+ 'Precision':(100*val_prec).astype(int),
161
+ 'Recall':(100*val_rec).astype(int),
162
+ 'F1':(100*val_f1).astype(int),
163
+ 'Precision_per_class':(100*val_prec_per_class).astype(int),
164
+ 'Recall_per_class':(100*val_rec_per_class).astype(int),
165
+ 'F1_per_class':(100*val_f1_per_class).astype(int),
166
  }
167
+ # report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
168
 
169
+ return metrics
170
 
171
 
172
  def _evaluate_hf_api(file_path, hf_token=None):
 
178
  "Authorization": f"Bearer {hf_token}",
179
  "Content-Type": "application/json"
180
  } if hf_token else {"Content-Type": "application/json"},
181
+ timeout=30
182
  )
183
  response.raise_for_status() # Raise HTTP errors
184
  return response.json()