Spaces:
Sleeping
Sleeping
import numpy as np | |
from sklearn.metrics import classification_report | |
import zipfile | |
import json | |
import pandas as pd | |
from .config import TAG_NAMES | |
def load_data(test_data_path): | |
# zip file handler | |
zip_file = zipfile.ZipFile('code_classification_dataset.zip') | |
# list available files in the container | |
names = zip_file.namelist() | |
data = [] | |
features = ["prob_desc_description","prob_desc_input_spec","prob_desc_output_spec"] | |
cols = features + ["tags"] | |
# extract a specific file from the zip container | |
for name in names[1:]: | |
f = zip_file.open(name) | |
# save the extraced file | |
content = f.read() | |
d = json.loads(content) | |
# json_fmt = json.dumps(d, indent=2) | |
# print(json_fmt) | |
row = [] | |
for c in cols: | |
row.append(d[c]) | |
data.append(row) | |
df = pd.DataFrame(data, columns=cols) | |
return df | |
def preprocessing(df): | |
# Example dataset | |
texts = df["prob_desc_description"].values.tolist() | |
labels = df[TAG_NAMES].values.tolist() | |
# data: | |
# texts = ["text1", "text2", ...] # list of texts | |
# labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels | |
df = pd.DataFrame({'text':texts, 'labels': labels}) | |
def evaluate_model(test_data_path): | |
df = load_data(test_data_path) | |
df = preprocessing(df) | |
return metrics |