Spaces:

KeivanR
/

qwen-classifier-demo

Sleeping

text input as json

b0cd906 about 1 month ago

1.34 kB

	import numpy as np
	from sklearn.metrics import classification_report
	import zipfile
	import json
	import pandas as pd
	from .config import TAG_NAMES

	def load_data(test_data_path):
	# zip file handler
	zip_file = zipfile.ZipFile('code_classification_dataset.zip')

	# list available files in the container
	names = zip_file.namelist()
	data = []
	features = ["prob_desc_description","prob_desc_input_spec","prob_desc_output_spec"]
	cols = features + ["tags"]
	# extract a specific file from the zip container
	for name in names[1:]:
	f = zip_file.open(name)

	# save the extraced file
	content = f.read()
	d = json.loads(content)
	# json_fmt = json.dumps(d, indent=2)
	# print(json_fmt)
	row = []
	for c in cols:
	row.append(d[c])
	data.append(row)
	df = pd.DataFrame(data, columns=cols)
	return df

	def preprocessing(df):
	# Example dataset
	texts = df["prob_desc_description"].values.tolist()
	labels = df[TAG_NAMES].values.tolist()

	# data:
	# texts = ["text1", "text2", ...] # list of texts
	# labels = [[0,1,0,0,1,0,1,1,0], [0,1,1,0,0,0,0,0,0],, ...] # list of labels

	df = pd.DataFrame({'text':texts, 'labels': labels})


	def evaluate_model(test_data_path):
	df = load_data(test_data_path)
	df = preprocessing(df)
	return metrics