Bagratuni commited on
Commit
2f6fff2
·
1 Parent(s): c495ec8
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -16,7 +16,7 @@ def refresh_data():
16
  global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
17
 
18
  global_output_armenian = unified_exam_result_table(global_unified_exam_df)
19
- # global_output_mmlu = mmlu_result_table(global_mmlu_df)
20
 
21
  return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
22
 
@@ -26,7 +26,7 @@ def main():
26
  global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
27
 
28
  global_output_armenian = unified_exam_result_table(global_unified_exam_df)
29
- # global_output_mmlu = mmlu_result_table(global_mmlu_df)
30
 
31
  with gr.Blocks() as app:
32
  with gr.Tabs():
@@ -40,17 +40,17 @@ def main():
40
  table_output_armenian = gr.DataFrame(value=global_output_armenian)
41
  plot_column_dropdown_unified_exam = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
42
  plot_output_armenian = gr.Plot(lambda column: unified_exam_chart(global_output_armenian, column), inputs=plot_column_dropdown_unified_exam)
43
- # with gr.TabItem("MMLU-Pro-Hy"):
44
- # gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
45
- # gr.Markdown(
46
- # """
47
- # This benchmark contains results of various Language Models on the MMLU-Pro benchmark, translated into Armenian. MMLU-Pro is a massive multi-task test in MCQA format. The scores represent accuracy.
48
- # """
49
- # )
50
- # table_output_mmlu = gr.DataFrame(value=global_output_mmlu)
51
- # subject_cols = ['Average','Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Philosophy', 'Physics', 'Psychology','Other']
52
- # plot_column_dropdown_mmlu = gr.Dropdown(choices=subject_cols, value='Average', label='Select Column to Plot')
53
- # plot_output_mmlu = gr.Plot(lambda column: mmlu_chart(global_output_mmlu, column), inputs=plot_column_dropdown_mmlu)
54
  with gr.TabItem("About"):
55
  gr.Markdown("# About the Benchmark")
56
  gr.Markdown(
@@ -112,9 +112,9 @@ def main():
112
  refresh_button.click(
113
  fn=refresh_data,
114
  outputs=[table_output_armenian,
115
- # table_output_mmlu,
116
  plot_output_armenian,
117
- # plot_output_mmlu
118
  ],
119
  )
120
  app.launch(share=True, debug=True)
 
16
  global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
17
 
18
  global_output_armenian = unified_exam_result_table(global_unified_exam_df)
19
+ global_output_mmlu = mmlu_result_table(global_mmlu_df)
20
 
21
  return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
22
 
 
26
  global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
27
 
28
  global_output_armenian = unified_exam_result_table(global_unified_exam_df)
29
+ global_output_mmlu = mmlu_result_table(global_mmlu_df)
30
 
31
  with gr.Blocks() as app:
32
  with gr.Tabs():
 
40
  table_output_armenian = gr.DataFrame(value=global_output_armenian)
41
  plot_column_dropdown_unified_exam = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
42
  plot_output_armenian = gr.Plot(lambda column: unified_exam_chart(global_output_armenian, column), inputs=plot_column_dropdown_unified_exam)
43
+ with gr.TabItem("MMLU-Pro-Hy"):
44
+ gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
45
+ gr.Markdown(
46
+ """
47
+ This benchmark contains results of various Language Models on the MMLU-Pro benchmark, translated into Armenian. MMLU-Pro is a massive multi-task test in MCQA format. The scores represent accuracy.
48
+ """
49
+ )
50
+ table_output_mmlu = gr.DataFrame(value=global_output_mmlu)
51
+ subject_cols = ['Average','Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Philosophy', 'Physics', 'Psychology','Other']
52
+ plot_column_dropdown_mmlu = gr.Dropdown(choices=subject_cols, value='Average', label='Select Column to Plot')
53
+ plot_output_mmlu = gr.Plot(lambda column: mmlu_chart(global_output_mmlu, column), inputs=plot_column_dropdown_mmlu)
54
  with gr.TabItem("About"):
55
  gr.Markdown("# About the Benchmark")
56
  gr.Markdown(
 
112
  refresh_button.click(
113
  fn=refresh_data,
114
  outputs=[table_output_armenian,
115
+ table_output_mmlu,
116
  plot_output_armenian,
117
+ plot_output_mmlu
118
  ],
119
  )
120
  app.launch(share=True, debug=True)