{ "gpt-4o": { "display_name": "gpt 4o", "provider": "openai", "open": false, "size": "?B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } } } }, "claude-3-7-sonnet-20250219": { "display_name": "Claude 3.7 Sonnet", "provider": "anthropic", "open": false, "size": "?B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-43-49.294245", "thinking": "2025-03-05T15-37-37.180318" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-37-52.771787", "thinking": "2025-03-05T12-39-13.627801" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-37-52.771787", "thinking": "2025-03-05T12-39-13.627801" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "default": "2025-02-25T12-24-45.750753", "thinking": "2025-03-05T15-37-37.180318" } } } }, "o3-mini-2025-01-31": { "display_name": "o3-mini", "provider": "openai", "open": false, "size": "?B", "thinking": true, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } } } }, "moonshotai/Moonlight-16B-A3B-Instruct": { "display_name": "Moonlight", "provider": "moonshotai", "open": true, "size": "16B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } } } }, "meta-llama/Llama-3.3-70B-Instruct": { "display_name": "Llama 3.3 70B", "provider": "meta", "open": true, "size": "70B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-26T17-13-13.448521" } } } }, "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "display_name": "DeepSeek Llama 70B", "provider": "deepseek", "open": true, "size": "70B", "thinking": true, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-27T14-02-02.414381" } } } }, "qihoo360/TinyR1-32B-Preview": { "display_name": "TinyR1 32B", "provider": "qihoo360", "open": true, "size": "32B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-27T13-32-41.564652" } } } }, "openai/gpt-4.5-preview-2025-02-27": { "display_name": "gpt 4.5", "provider": "openai", "open": false, "size": "?B", "thinking": false, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-35-34.241611" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-15-32.836958" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-15-32.836958" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-03T11-17-20.767980" } } } }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { "display_name": "DeepSeek Qwen 32B", "provider": "deepseek", "open": true, "size": "32B", "thinking": true, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-03T15-06-10.838105" } } } }, "openai/deepseek-ai/DeepSeek-R1": { "display_name": "DeepSeek R1", "provider": "deepseek", "open": true, "size": "671B", "thinking": true, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T17-06-33.124766" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T14-52-35.594174" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T14-25-05.009799" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-04T15-24-42.488745" } } } }, "Qwen/QwQ-32B": { "display_name": "QwQ 32B", "provider": "Qwen", "open": true, "size": "32B", "thinking": true, "benchmarks": { "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T11-47-46.303371" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T10-36-07.886033" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T10-36-07.886033" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-10T12-21-36.862202" } } } }, "google/gemma-3-1b-it": { "display_name": "Gemma 3", "provider": "google", "open": true, "size": "1B", "thinking": false, "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-25-56.178612" } } } }, "google/gemma-3-12b-it": { "display_name": "Gemma 3 12B", "provider": "google", "open": true, "size": "12B", "thinking": false, "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-36-23.368081" } } } }, "google/gemma-3-27b-it": { "display_name": "Gemma 3 27B", "provider": "google", "open": true, "size": "27B", "thinking": false, "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-41-33.181467" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T15-11-34.174477" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-18T15-20-14.979833" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T15-20-14.979833" } } } }, "openai/deepseek-ai/DeepSeek-V3-0324": { "display_name": "DeepSeek V3 0324", "provider": "deepseek", "open": true, "size": "671B", "thinking": false, "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T15-00-18.969082" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T15-00-18.969082" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-25T15-34-22.165555" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T14-22-37.175021" } } } }, "openai/deepseek-ai/DeepSeek-V3": { "display_name": "DeepSeek V3", "provider": "deepseek", "open": true, "size": "671B", "thinking": false, "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T19-39-33.880476" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T19-39-33.880476" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-25T19-39-33.880476" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-25T19-39-33.880476" } } } }, "meta-llama/Llama-4-Scout-17B-16E-Instruct": { "display_name": "Llama 4 Scout 17B", "provider": "meta", "open": true, "size": "17B (109B params)", "thinking": false, "benchmarks": { "aime_25": { "subset": "custom|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T12-01-58.793350" } }, "aime_24": { "subset": "custom|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T12-01-58.793350" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-04-07T12-01-58.793350" } }, "gpqa_diamond": { "subset": "custom|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T12-01-58.793350" } } } }, "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { "display_name": "Llama 4 Maverick 17B FP8", "provider": "meta", "open": true, "size": "17B (400B params)", "thinking": false, "benchmarks": { "aime_25": { "subset": "custom|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T13-08-22.017751" } }, "aime_24": { "subset": "custom|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T13-08-22.017751" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-04-07T13-08-22.017751" } }, "gpqa_diamond": { "subset": "custom|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-04-07T13-08-22.017751" } } } } }