Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

App Files Files Community

KeivanR commited on Mar 29

Commit

fe52d58

1 Parent(s): f25cbe8

eval fix class issue

Browse files

Files changed (3) hide show

.gitignore +3 -1
qwen_classifier/config.py +1 -1
qwen_classifier/evaluate.py +13 -13

.gitignore CHANGED Viewed

@@ -84,4 +84,6 @@ $RECYCLE.BIN/
 # Project-specific (adjust as needed)
 qwen_classifier/__pycache__/
-qwen_classifier.egg-info/

 # Project-specific (adjust as needed)
 qwen_classifier/__pycache__/
+qwen_classifier.egg-info/
+qwen_classifier/test_jsons.zip

qwen_classifier/config.py CHANGED Viewed

@@ -19,4 +19,4 @@ TAG_NAMES = [
     'trees'
     ]
-EVAL_LIMIT = 2

     'trees'
     ]
+EVAL_LIMIT = 20

qwen_classifier/evaluate.py CHANGED Viewed

@@ -70,7 +70,7 @@ def _load_data(test_data_path):
     return pd.DataFrame(data, columns=cols)
 def _preprocessing(df):
-    mlb = MultiLabelBinarizer()
     tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
     # Filter tags and one-hot encode
@@ -83,7 +83,7 @@ def _preprocessing(df):
     # Concatenate the encoded tags with the original DataFrame
     df = pd.concat([df, encoded_df], axis=1)
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()
@@ -127,7 +127,7 @@ def _evaluate_local(test_data_path, hf_repo):
     dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
-    dataloader = DataLoader(dataset, batch_size=8, shuffle=True)
     global_model.eval()
@@ -156,17 +156,17 @@ def _evaluate_local(test_data_path, hf_repo):
     val_f1_per_class = f1_score(all_labels, all_preds, average=None)
     metrics = {
-        val_acc,
-        val_prec,
-        val_rec,
-        val_f1,
-        val_prec_per_class,
-        val_rec_per_class,
-        val_f1_per_class
     }
-    report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
-    return metrics, report
 def _evaluate_hf_api(file_path, hf_token=None):
@@ -178,7 +178,7 @@ def _evaluate_hf_api(file_path, hf_token=None):
                 "Authorization": f"Bearer {hf_token}",
                 "Content-Type": "application/json"
             } if hf_token else {"Content-Type": "application/json"},
-            timeout=10
         )
         response.raise_for_status()  # Raise HTTP errors
         return response.json()

     return pd.DataFrame(data, columns=cols)
 def _preprocessing(df):
+    mlb = MultiLabelBinarizer(classes = TAG_NAMES)
     tags_to_encode = ['math', 'graphs', 'strings', 'number theory', 'trees', 'geometry', 'games', 'probabilities']
     # Filter tags and one-hot encode
     # Concatenate the encoded tags with the original DataFrame
     df = pd.concat([df, encoded_df], axis=1)
+    print(df.columns)
     texts = df["prob_desc_description"].values.tolist()
     labels = df[TAG_NAMES].values.tolist()
     dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
+    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
     global_model.eval()
     val_f1_per_class = f1_score(all_labels, all_preds, average=None)
     metrics = {
+        'Accuracy':(100*val_acc).astype(int),
+        'Precision':(100*val_prec).astype(int),
+        'Recall':(100*val_rec).astype(int),
+        'F1':(100*val_f1).astype(int),
+        'Precision_per_class':(100*val_prec_per_class).astype(int),
+        'Recall_per_class':(100*val_rec_per_class).astype(int),
+        'F1_per_class':(100*val_f1_per_class).astype(int),
     }
+    # report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0)
+    return metrics
 def _evaluate_hf_api(file_path, hf_token=None):
                 "Authorization": f"Bearer {hf_token}",
                 "Content-Type": "application/json"
             } if hf_token else {"Content-Type": "application/json"},
+            timeout=30
         )
         response.raise_for_status()  # Raise HTTP errors
         return response.json()