Spaces:
Sleeping
Sleeping
import numpy as np | |
from sklearn.metrics import classification_report | |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score | |
import zipfile | |
import json | |
import pandas as pd | |
import torch | |
from datasets import Dataset | |
from torch.utils.data import DataLoader | |
from .config import TAG_NAMES, DEVICE | |
from .predict import predict_batch | |
def load_data(test_data_path): | |
# zip file handler | |
zip_file = zipfile.ZipFile('code_classification_dataset.zip') | |
# list available files in the container | |
names = zip_file.namelist() | |
data = [] | |
features = ["prob_desc_description","prob_desc_input_spec","prob_desc_output_spec"] | |
cols = features + ["tags"] | |
# extract a specific file from the zip container | |
for name in names[1:]: | |
f = zip_file.open(name) | |
# save the extraced file | |
content = f.read() | |
d = json.loads(content) | |
# json_fmt = json.dumps(d, indent=2) | |
# print(json_fmt) | |
row = [] | |
for c in cols: | |
row.append(d[c]) | |
data.append(row) | |
df = pd.DataFrame(data, columns=cols) | |
return df | |
def preprocessing(df): | |
texts = df["prob_desc_description"].values.tolist() | |
labels = df[TAG_NAMES].values.tolist() | |
# data: | |
# texts = ["text1", "text2", ...] # list of texts | |
# labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels | |
df = pd.DataFrame({'text':texts, 'labels': labels}) | |
return df | |
def evaluate_batch(text, hf_repo, backend="local", hf_token=None): | |
if backend == "local": | |
return _evaluate_local(text, hf_repo) | |
elif backend == "hf": | |
return _evaluate_hf_api(text, hf_token) | |
else: | |
raise ValueError(f"Unknown backend: {backend}") | |
def _evaluate_local(test_data_path, hf_repo): | |
global local_model, local_tokenizer | |
# Lazy-loading to avoid slow startup | |
if local_model is None: | |
from .model import QwenClassifier | |
from transformers import AutoTokenizer | |
local_model = QwenClassifier.from_pretrained(hf_repo).eval() | |
local_tokenizer = AutoTokenizer.from_pretrained(hf_repo) | |
df = load_data(test_data_path) | |
df = preprocessing(df) | |
hf_dataset = Dataset.from_pandas(df) | |
# Then apply tokenization | |
def tokenize_function(examples): | |
return local_tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512) | |
dataset = hf_dataset.map(tokenize_function, batched=True) | |
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels']) | |
dataloader = DataLoader(dataset, batch_size=8, shuffle=True) | |
local_model.eval() | |
all_preds = [] | |
all_labels = [] | |
with torch.no_grad(): | |
for batch in dataloader: | |
batch = {k: v.to(DEVICE) for k, v in batch.items()} | |
labels = batch["labels"].type(torch.float32) | |
logits = local_model(batch["input_ids"], batch["attention_mask"]) | |
preds = torch.sigmoid(logits).cpu().numpy() > 0.5 | |
labels = labels.cpu().numpy() | |
all_preds.extend(preds) | |
all_labels.extend(labels) | |
val_acc = accuracy_score(all_labels, all_preds) | |
val_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0) | |
val_rec = recall_score(all_labels, all_preds, average='macro') | |
val_f1 = f1_score(all_labels, all_preds, average='macro') | |
val_prec_per_class = precision_score(all_labels, all_preds, average=None, zero_division=0) | |
val_rec_per_class = recall_score(all_labels, all_preds, average=None) | |
val_f1_per_class = f1_score(all_labels, all_preds, average=None) | |
metrics = { | |
val_acc, | |
val_prec, | |
val_rec, | |
val_f1, | |
val_prec_per_class, | |
val_rec_per_class, | |
val_f1_per_class | |
} | |
report = classification_report(all_labels, all_preds, target_names=TAG_NAMES, zero_division=0) | |
return metrics, report | |