from pathlib import Path import pandas as pd import os import re from scipy import stats from src.constants import ProblemTypes, MetricNames METRIC_CHOICES = [ MetricNames.normalized_error, MetricNames.fit_time_per_1K_rows, MetricNames.inference_time_per_1K_rows, ] # Define the formatting function def format_number(num): # Check if the value is numeric if isinstance(num, (int, float)): if abs(num) >= 10**2: return f"{num:.1e}" else: return f"{num:.3f}" # Return non-numeric values as-is return num def norm_sNavie(df): df_normalized = df.copy() seasonal_naive_row = df[df['model'] == 'seasonal_naive'].iloc[0] print('df: ',df) for column in df.columns: if column != 'model': # We skip normalizing the 'model' column df_normalized[column] = df[column] / seasonal_naive_row[column] return df_normalized def pivot_df(file_name, tab_name): df = pd.read_csv(file_name) if tab_name == 'univariate': df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'}) df.rename(columns={'univariate': 'variate_type'}, inplace=True) tab_name = 'variate_type' df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value') df_melted['metric'] = df_melted['metric'].replace({ 'eval_metrics/MAPE[0.5]': 'MAPE', 'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS' }) df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value') df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns] # df_pivot.to_csv('pivoted_df.csv') # print(df_pivot) df_pivot = df_pivot.reset_index() df_pivot = df_pivot.round(3) return df_pivot def rename_metrics(df): df = df.rename(columns={ f'eval_metrics/{MetricNames.normalized_error}': MetricNames.normalized_error, f'eval_metrics/{MetricNames.inference_time_per_1K_rows}': "Inference time / 1K rows (s)", f'eval_metrics/{MetricNames.fit_time_per_1K_rows}': "Fit time / 1K rows (s)", }) return df def format_df(df): df = df.applymap(format_number) # make sure the data type is float df.iloc[:, 1:] = df.iloc[:, 1:].astype(float) return df def unify_freq(df): # Remove all numeric characters from the 'frequency' column df['frequency'] = df['frequency'].str.replace(r'\d+', '', regex=True) # Remove everything after '-' if present df['frequency'] = df['frequency'].str.split('-').str[0] # Define the frequency conversion dictionary freq_conversion = { 'T': 'Minutely', 'H': 'Hourly', 'D': 'Daily', 'W': 'Weekly', 'M': 'Monthly', 'Q': 'Quarterly', 'Y': 'Yearly', 'A': 'Yearly', 'S': 'Secondly' } # Map the cleaned 'frequency' values using the dictionary df['frequency'] = df['frequency'].replace(freq_conversion) return df def pivot_existed_df(df, tab_name): df = df.reset_index() if tab_name == 'univariate': df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'}) df.rename(columns={'univariate': 'variate_type'}, inplace=True) tab_name = 'variate_type' print('tab_name:', tab_name, 'df: ',df) print('columns', df.columns) df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value') df_melted['metric'] = df_melted['metric'].replace({ 'eval_metrics/normalized-error': 'normalized-error', 'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS', 'rank': 'Rank', }) df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value') df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns] df_pivot = df_pivot.reset_index() # df_pivot = df_pivot.round(3) df_pivot = format_df(df_pivot) # df_pivot = df_pivot.applymap(format_number) # # make sure the data type is float # df_pivot.iloc[:, 1:] = df_pivot.iloc[:, 1:].astype(float) return df_pivot def get_grouped_dfs(root_dir=None): if root_dir is None: root_dir = Path(__file__).parent.parent / "results" df_list = [] # Walk through all folders and subfolders in the root directory for csv_path in Path(root_dir).rglob("*csv"): if 'all_results.csv' in str(csv_path): df_list.append(pd.read_csv(csv_path)) # Concatenate all dataframes into one all_results_df = pd.concat(df_list, ignore_index=True) all_results_df = all_results_df.sort_values(by=['model', 'dataset']).reset_index(drop=True) ds_properties = root_dir / 'dataset_properties.csv' dataset_properties = pd.read_csv(ds_properties) # # Reforemat the first element of each row after the header following these rules: # # 1. make all characters lowercase # dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.lower()) # # 2. replace all spaces with underscores # dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace(' ', '_')) # # 3. Replace all dashes with underscores # dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace('-', '_')) # # 4. Replace consecutive underscores with a single underscore. There maybe more than 2 consecutive underscores # dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: re.sub('_+', '_', x)) # # 5. Remove all leading and trailing underscores # dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.strip('_')) df = all_results_df # convert it to a dictionary, with dataset as the key, and the value as another dictionary. The inner dictionary has the column names as the key, and the value as the value. dataset_properties_dict = dataset_properties.set_index('dataset').T.to_dict('dict') # match the dataset name in model_properties_dict with the dataset name in df and add a new column for each key value pair in the inner dictionary. for dataset in dataset_properties_dict.keys(): for key in dataset_properties_dict[dataset].keys(): df.loc[df['dataset'] == dataset, key] = dataset_properties_dict[dataset][key] # unify the frequency # df = unify_freq(df) # standardize by seasonal naive df = standardize_df(df) # TODO compute normalized error # TODO change to ELO RANKING_METRIC = "normalized-error" # compute metrics that requires all methods results such as Rank and Elo. df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True) df['ELO'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True) * 100 # group by domain grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg("mean") grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean() grouped_results_overall_elo = df.groupby(['model'])[['ELO']].mean() grouped_results_overall = pd.concat([ grouped_results_overall, grouped_results_overall_rank, grouped_results_overall_elo], axis=1 ) # grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'}) # grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv') grouped_dfs = {} # for col_name in ["domain", 'term_length', 'frequency', 'univariate']: for col_name in [ProblemTypes.col_name]: grouped_dfs[col_name] = group_by(df, col_name) # print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}") grouped_dfs['overall'] = grouped_results_overall return grouped_dfs def standardize_df(df): raw_metric = MetricNames.raw_error # Perform min-max normalization. We may want to do something more outlier robust like what is done # in Tabrepo by doing (x - x.min()) / (x.median() - x.min()) df[MetricNames.normalized_error] = df.groupby('dataset')[raw_metric].transform( lambda x: (x - x.min()) / (x.max() - x.min()) ) return df def group_by(df, col_name): #grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].agg(stats.gmean) grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].mean() grouped_results_rank = df.groupby([col_name, 'model'])[['rank']].mean() grouped_results = pd.concat([grouped_results, grouped_results_rank], axis=1) # Display the results # Write the results to a csv file # grouped_results.to_csv(f'grouped_results_by_{col_name}.csv') return grouped_results