Spaces:
Running
Running
commit
Browse files
app.py
CHANGED
@@ -16,30 +16,30 @@ def refresh_data():
|
|
16 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
17 |
|
18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
19 |
-
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
20 |
|
21 |
-
return global_output_armenian,
|
22 |
|
23 |
def main():
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
|
28 |
-
|
29 |
# global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
30 |
|
31 |
with gr.Blocks() as app:
|
32 |
with gr.Tabs():
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
# with gr.TabItem("MMLU-Pro-Hy"):
|
44 |
# gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
|
45 |
# gr.Markdown(
|
@@ -108,14 +108,14 @@ def main():
|
|
108 |
"""
|
109 |
)
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
app.launch(share=True, debug=True)
|
120 |
|
121 |
if __name__ == "__main__":
|
|
|
16 |
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
17 |
|
18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
19 |
+
# global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
20 |
|
21 |
+
return global_output_armenian, unified_exam_chart(global_output_armenian, 'Average')
|
22 |
|
23 |
def main():
|
24 |
+
global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
|
25 |
+
model_handler = ModelHandler()
|
26 |
+
global_mmlu_df, global_unified_exam_df = model_handler.get_arm_bench_data()
|
27 |
|
28 |
+
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
29 |
# global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
30 |
|
31 |
with gr.Blocks() as app:
|
32 |
with gr.Tabs():
|
33 |
+
with gr.TabItem("Armenian Unified Exams"):
|
34 |
+
gr.Markdown("# Armenian Unified Test Exams")
|
35 |
+
gr.Markdown(
|
36 |
+
"""
|
37 |
+
This benchmark contains results of various Language Models on Armenian Unified Test Exams for Armenian language and literature, Armenian history and mathematics. The scoring system is a 20-point scale, where 0-8 is a Fail, 8-18 is a Pass, and 18-20 is a Distinction.
|
38 |
+
"""
|
39 |
+
)
|
40 |
+
table_output_armenian = gr.DataFrame(value=global_output_armenian)
|
41 |
+
plot_column_dropdown_unified_exam = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
|
42 |
+
plot_output_armenian = gr.Plot(lambda column: unified_exam_chart(global_output_armenian, column), inputs=plot_column_dropdown_unified_exam)
|
43 |
# with gr.TabItem("MMLU-Pro-Hy"):
|
44 |
# gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
|
45 |
# gr.Markdown(
|
|
|
108 |
"""
|
109 |
)
|
110 |
|
111 |
+
refresh_button = gr.Button("Refresh Data")
|
112 |
+
refresh_button.click(
|
113 |
+
fn=refresh_data,
|
114 |
+
outputs=[table_output_armenian,
|
115 |
+
# table_output_mmlu,
|
116 |
+
plot_output_armenian,
|
117 |
+
# plot_output_mmlu],
|
118 |
+
)
|
119 |
app.launch(share=True, debug=True)
|
120 |
|
121 |
if __name__ == "__main__":
|