Xueqing commited on
Commit
16c533f
·
verified ·
1 Parent(s): f9ec874

Update model_performance.csv

Browse files
Files changed (1) hide show
  1. model_performance.csv +27 -22
model_performance.csv CHANGED
@@ -1,22 +1,27 @@
1
- Models,Average,FinQA,DM-Simplong,XBRL-Math,Type
2
- GPT-4o,68.24,72.49,60.0,72.22,Instruction-tuned
3
- GPT-4.5,67.46,68.94,59.0,74.44,Instruction-tuned
4
- GPT-o1,59.84,49.07,56.0,74.44,Reasoning-enhanced
5
- GPT-o3-mini,65.51,60.87,59.0,76.67,Reasoning-enhanced
6
- DeepSeek-V3,67.62,73.2,53.0,76.67,Instruction-tuned
7
- DeepSeek-R1,68.93,65.13,53.0,86.67,Reasoning-enhanced
8
- Qwen2.5-72B-Instruct,66.72,73.38,59.0,67.78,Instruction-tuned
9
- Qwen2.5-72B-Instruct-Math,65.69,69.74,42.0,83.33,Reasoning-enhanced
10
- Qwen2.5-32B-Instruct,64.89,73.11,56.0,65.56,Instruction-tuned
11
- DeepSeek-R1-Distill-Llama-70B,68.8,66.73,53.0,86.67,Reasoning-enhanced
12
- Llama3-70B-Instruct,52.2,58.92,41.0,56.67,Instruction-tuned
13
- Llama3.1-70B-Instruct,58.17,63.18,48.0,63.33,Instruction-tuned
14
- Llama3.3-70B-Instruct,64.05,68.15,54.0,70.0,Instruction-tuned
15
- DeepSeek-R1-Distill-Qwen-32B,68.97,65.48,55.0,84.44,Reasoning-enhanced
16
- DeepSeek-R1-Distill-Qwen-14B,63.9,63.27,44.0,84.44,Reasoning-enhanced
17
- DeepSeek-R1-Distill-Llama-8B,53.36,45.96,33.0,81.11,Reasoning-enhanced
18
- Llama3-8B-Instruct,39.95,41.97,29.0,48.89,Instruction-tuned
19
- Llama3.1-8B-Instruct,50.12,54.13,34.0,62.22,Instruction-tuned
20
- LIMO,56.52,63.44,45.0,61.11,Reasoning-enhanced
21
- s1-32B,68.08,66.81,53.0,84.44,Reasoning-enhanced
22
- Fino1-8B,61.03,60.87,40.0,82.22,Reasoning-enhanced
 
 
 
 
 
 
1
+ Models,Average,FinQA,DM-Simplong,XBRL-Math,DM-Complong,Type
2
+ GPT-4o,61.01,72.49,60,72.22,39.33,Instruction-tuned
3
+ GPT-o1,54.045,49.07,56,74.44,36.67,Reasoning-enhanced
4
+ GPT-o3-mini,57.885,60.87,59,76.67,35,Reasoning-enhanced
5
+ DeepSeek-V3,61.3,73.2,53,76.67,42.33,Instruction-tuned
6
+ DeepSeek-R1,60.8675,65.13,53,86.67,38.67,Reasoning-enhanced
7
+ GPT-4.5,60.4275,68.94,59,74.44,39.33,Instruction-tuned
8
+ DeepSeek-R1-Distill-Llama-70B,59.2675,66.73,53,86.67,30.67,Reasoning-enhanced
9
+ Llama-3-70B-Instruct,42.565,58.92,41,56.67,13.67,Instruction-tuned
10
+ Llama-3.1-70B-Instruct,52.21,63.18,48,63.33,34.33,Instruction-tuned
11
+ Llama-3.3-70B-Instruct,56.0375,68.15,54,70,32,Instruction-tuned
12
+ DeepSeek-R1-Distill-Qwen-32B,57.3975,65.48,55,84.44,24.67,Reasoning-enhanced
13
+ DeepSeek-R1-Distill-Qwen-14B,53.1775,63.27,44,84.44,21,Reasoning-enhanced
14
+ DeepSeek-R1-Distill-Llama-8B,43.935,45.96,33,81.11,15.67,Reasoning-enhanced
15
+ Llama-3-8B-Instruct,31.465,41.97,29,48.89,6,Instruction-tuned
16
+ Llama-3.1-8B-Instruct,41.1625,54.13,34,62.22,14.3,Instruction-tuned
17
+ Qwen2.5-7B-Instruct,39.065,55.37,41,42.22,17.67,Instruction-tuned
18
+ Qwen2.5-14B-Instruct,52.7225,67.44,59,57.78,26.67,Instruction-tuned
19
+ Qwen2.5-32B-Instruct,56.1675,73.11,56,65.56,30,Instruction-tuned
20
+ Qwen2.5-72B-Instruct,53.7075,73.38,59,67.78,14.67,Instruction-tuned
21
+ Qwen2.5-Math-72B-Instruct,50.0175,69.74,42,83.33,5,Reasoning-enhanced
22
+ LIMO,46.22,63.44,45,61.11,15.33,Reasoning-enhanced
23
+ S1,57.0625,66.81,53,84.44,24,Reasoning-enhanced
24
+ QwQ-32B,52.915,61.22,46,84.44,20,Reasoning-enhanced
25
+ FInR1-7B,34.8525,58.74,37,30,13.67,Reasoning-enhanced
26
+ Fino1-8B,50.7725,60.87,40,82.22,20,Reasoning-enhanced
27
+ Fino1-14B,60.2525,70.01,60,86.67,24.33,Reasoning-enhanced