TabArena-WIP / src /utils.py
geoalgo's picture
fix sorting metric (for now rank as ELO not computed), remove dataset renaming which was messing around when adding dataset properties
7ad3cf3
raw
history blame contribute delete
8.86 kB
from pathlib import Path
import pandas as pd
import os
import re
from scipy import stats
from src.constants import ProblemTypes, MetricNames
METRIC_CHOICES = [
MetricNames.normalized_error,
MetricNames.fit_time_per_1K_rows,
MetricNames.inference_time_per_1K_rows,
]
# Define the formatting function
def format_number(num):
# Check if the value is numeric
if isinstance(num, (int, float)):
if abs(num) >= 10**2:
return f"{num:.1e}"
else:
return f"{num:.3f}"
# Return non-numeric values as-is
return num
def norm_sNavie(df):
df_normalized = df.copy()
seasonal_naive_row = df[df['model'] == 'seasonal_naive'].iloc[0]
print('df: ',df)
for column in df.columns:
if column != 'model': # We skip normalizing the 'model' column
df_normalized[column] = df[column] / seasonal_naive_row[column]
return df_normalized
def pivot_df(file_name, tab_name):
df = pd.read_csv(file_name)
if tab_name == 'univariate':
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'})
df.rename(columns={'univariate': 'variate_type'}, inplace=True)
tab_name = 'variate_type'
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value')
df_melted['metric'] = df_melted['metric'].replace({
'eval_metrics/MAPE[0.5]': 'MAPE',
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS'
})
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value')
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns]
# df_pivot.to_csv('pivoted_df.csv')
# print(df_pivot)
df_pivot = df_pivot.reset_index()
df_pivot = df_pivot.round(3)
return df_pivot
def rename_metrics(df):
df = df.rename(columns={
f'eval_metrics/{MetricNames.normalized_error}': MetricNames.normalized_error,
f'eval_metrics/{MetricNames.inference_time_per_1K_rows}': "Inference time / 1K rows (s)",
f'eval_metrics/{MetricNames.fit_time_per_1K_rows}': "Fit time / 1K rows (s)",
})
return df
def format_df(df):
df = df.applymap(format_number)
# make sure the data type is float
df.iloc[:, 1:] = df.iloc[:, 1:].astype(float)
return df
def unify_freq(df):
# Remove all numeric characters from the 'frequency' column
df['frequency'] = df['frequency'].str.replace(r'\d+', '', regex=True)
# Remove everything after '-' if present
df['frequency'] = df['frequency'].str.split('-').str[0]
# Define the frequency conversion dictionary
freq_conversion = {
'T': 'Minutely',
'H': 'Hourly',
'D': 'Daily',
'W': 'Weekly',
'M': 'Monthly',
'Q': 'Quarterly',
'Y': 'Yearly',
'A': 'Yearly',
'S': 'Secondly'
}
# Map the cleaned 'frequency' values using the dictionary
df['frequency'] = df['frequency'].replace(freq_conversion)
return df
def pivot_existed_df(df, tab_name):
df = df.reset_index()
if tab_name == 'univariate':
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'})
df.rename(columns={'univariate': 'variate_type'}, inplace=True)
tab_name = 'variate_type'
print('tab_name:', tab_name, 'df: ',df)
print('columns', df.columns)
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value')
df_melted['metric'] = df_melted['metric'].replace({
'eval_metrics/normalized-error': 'normalized-error',
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS',
'rank': 'Rank',
})
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value')
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns]
df_pivot = df_pivot.reset_index()
# df_pivot = df_pivot.round(3)
df_pivot = format_df(df_pivot)
# df_pivot = df_pivot.applymap(format_number)
# # make sure the data type is float
# df_pivot.iloc[:, 1:] = df_pivot.iloc[:, 1:].astype(float)
return df_pivot
def get_grouped_dfs(root_dir=None):
if root_dir is None:
root_dir = Path(__file__).parent.parent / "results"
df_list = []
# Walk through all folders and subfolders in the root directory
for csv_path in Path(root_dir).rglob("*csv"):
if 'all_results.csv' in str(csv_path):
df_list.append(pd.read_csv(csv_path))
# Concatenate all dataframes into one
all_results_df = pd.concat(df_list, ignore_index=True)
all_results_df = all_results_df.sort_values(by=['model', 'dataset']).reset_index(drop=True)
ds_properties = root_dir / 'dataset_properties.csv'
dataset_properties = pd.read_csv(ds_properties)
# # Reforemat the first element of each row after the header following these rules:
# # 1. make all characters lowercase
# dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.lower())
# # 2. replace all spaces with underscores
# dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace(' ', '_'))
# # 3. Replace all dashes with underscores
# dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace('-', '_'))
# # 4. Replace consecutive underscores with a single underscore. There maybe more than 2 consecutive underscores
# dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: re.sub('_+', '_', x))
# # 5. Remove all leading and trailing underscores
# dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.strip('_'))
df = all_results_df
# convert it to a dictionary, with dataset as the key, and the value as another dictionary. The inner dictionary has the column names as the key, and the value as the value.
dataset_properties_dict = dataset_properties.set_index('dataset').T.to_dict('dict')
# match the dataset name in model_properties_dict with the dataset name in df and add a new column for each key value pair in the inner dictionary.
for dataset in dataset_properties_dict.keys():
for key in dataset_properties_dict[dataset].keys():
df.loc[df['dataset'] == dataset, key] = dataset_properties_dict[dataset][key]
# unify the frequency
# df = unify_freq(df)
# standardize by seasonal naive
df = standardize_df(df)
# TODO compute normalized error
# TODO change to ELO
RANKING_METRIC = "normalized-error"
# compute metrics that requires all methods results such as Rank and Elo.
df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True)
df['ELO'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first',
ascending=True) * 100
# group by domain
grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg("mean")
grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean()
grouped_results_overall_elo = df.groupby(['model'])[['ELO']].mean()
grouped_results_overall = pd.concat([
grouped_results_overall,
grouped_results_overall_rank,
grouped_results_overall_elo],
axis=1
)
# grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'})
# grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv')
grouped_dfs = {}
# for col_name in ["domain", 'term_length', 'frequency', 'univariate']:
for col_name in [ProblemTypes.col_name]:
grouped_dfs[col_name] = group_by(df, col_name)
# print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}")
grouped_dfs['overall'] = grouped_results_overall
return grouped_dfs
def standardize_df(df):
raw_metric = MetricNames.raw_error
# Perform min-max normalization. We may want to do something more outlier robust like what is done
# in Tabrepo by doing (x - x.min()) / (x.median() - x.min())
df[MetricNames.normalized_error] = df.groupby('dataset')[raw_metric].transform(
lambda x: (x - x.min()) / (x.max() - x.min())
)
return df
def group_by(df, col_name):
#grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].agg(stats.gmean)
grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].mean()
grouped_results_rank = df.groupby([col_name, 'model'])[['rank']].mean()
grouped_results = pd.concat([grouped_results, grouped_results_rank], axis=1)
# Display the results
# Write the results to a csv file
# grouped_results.to_csv(f'grouped_results_by_{col_name}.csv')
return grouped_results