ArmBench-LLM / data_handler.py
Bagratuni's picture
commit
29617b1 verified
import gradio as gr
import pandas as pd
import plotly.express as px
from model_handler import ModelHandler
def unified_exam_result_table(unified_exam_df):
df = unified_exam_df.copy()
df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
df.insert(0, 'Rank', range(1, len(df) + 1))
cols = df.columns.tolist()
cols.insert(2, cols.pop(cols.index('Average')))
df = df[cols]
df.rename(columns={'Armenian language and literature': 'Armenian language\nand literature'}, inplace=True)
df = df.round(4)
return df
def mmlu_result_table(mmlu_df):
df = mmlu_df.copy()
df = df.sort_values(by='Average', ascending=False).reset_index(drop=True)
df.insert(0, 'Rank', range(1, len(df) + 1))
cols = df.columns.tolist()
cols.insert(2, cols.pop(cols.index('Average')))
cols.append(cols.pop(cols.index('Other')))
df = df[cols]
df = df.round(4)
return df
def unified_exam_chart(unified_exam_df, plot_column):
if plot_column == 'Armenian language and literature':
plot_column = 'Armenian language\nand literature'
df = unified_exam_df.copy()
df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column}'
x_range_max = 20
def get_label(score):
if score < 8:
return "Fail"
elif 8 <= score <= 18:
return "Pass"
else:
return "Distinction"
df['Test Result'] = df[plot_column].apply(get_label)
color_discrete_map = {
"Fail": "#ff5f56",
"Pass": "#ffbd2e",
"Distinction": "#27c93f"
}
fig = px.bar(df,
x=x_col,
y='Model',
color=df['Test Result'],
color_discrete_map=color_discrete_map,
labels={x_col: 'Score', 'Model': 'Model'},
title=title,
orientation='h'
)
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed"),
width=1000
)
return fig
def mmlu_chart(mmlu_df, plot_column):
df = mmlu_df.copy()
subject_cols = ['Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Other', 'Philosophy', 'Physics', 'Psychology']
df['Average'] = df[subject_cols].mean(axis=1)
df = df.sort_values(by=plot_column, ascending=False).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column}'
x_range_max = 1.0
fig = px.bar(df,
x=x_col,
y='Model',
color=x_col,
color_continuous_scale='Viridis',
labels={x_col: 'Accuracy', 'Model': 'Model'},
title=title,
orientation='h',
range_color=[0,1]
)
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed"),
width=1000
)
return fig