File size: 6,544 Bytes
b3d493a 3364f2f 0070ea2 02764ad 393a15e 3364f2f b3d493a 4bccbd5 19fbdd9 9704dfc b3d493a 9704dfc acea508 393a15e 9dfb068 acea508 c937b81 b3d493a a0f1951 3364f2f acea508 b3d493a fad338b b35cd1f 0c4c053 b3d493a 0c4c053 acea508 8ed3678 acea508 b3d493a acea508 b3d493a acea508 b3d493a 80d1c51 0c4c053 acea508 0c4c053 acea508 dd2da1a acea508 0c4c053 80d1c51 3364f2f b35cd1f 3364f2f b35cd1f e17557b 02764ad b3d493a a0f1951 02764ad 9964967 af3ccf3 55541c0 6a9e408 b3d493a ee4468d acea508 0c4c053 acea508 0c4c053 b35cd1f 0c4c053 02764ad 99a545d fad338b fe01638 d35450e d94b1a3 02764ad b3d493a 02764ad 6d9b652 d78a80f 6d9b652 02764ad acea508 6d9b652 acea508 d78a80f acea508 3364f2f fe01638 874b675 acea508 874b675 acea508 3364f2f 02764ad 6d9b652 02764ad b3d493a 0c4c053 0e0b281 0c4c053 a0f1951 3364f2f a0f1951 1f689a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import React, { useState, useEffect } from 'react';
import { chain } from 'lodash';
import './App.css';
const ScoreBar = ({ score, isVanilla = false }) => {
if (score === undefined || score === null) return null;
const percentage = score <= 1 ? score * 100 : score;
const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
const backgroundColor = `hsl(${hue}, 80%, 50%)`;
const className = isVanilla ? "vanilla-bar" : "score-bar";
return (
<div className={className}>
<div
className="score-fill"
style={{
width: `${percentage}%`,
backgroundColor,
height: `100%`,
}}
/>
{!isVanilla && (
<span className="score-text">
{percentage.toFixed(1)}%
</span>
)}
</div>
);
};
const App = () => {
const [allData, setAllData] = useState([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
const [searchQuery, setSearchQuery] = useState('');
const [showVanilla, setShowVanilla] = useState(true);
const [showToolCalling, setShowToolCalling] = useState(false);
useEffect(() => {
const fetchData = async () => {
try {
setLoading(true);
// Fetch all data from API
const response = await fetch('https://smolagents-smolagents-leaderboard.hf.space/api/results');
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const jsonData = await response.json();
setAllData(jsonData);
} catch (err) {
console.error('Error fetching data:', err);
setError(err.message);
} finally {
setLoading(false);
}
};
fetchData();
}, []);
const handleSort = (key) => {
const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
setSortConfig({ key, direction });
};
const getFilteredData = () => {
const validActionTypes = ['tool-calling', 'tool_calling', 'code'];
return allData.filter(item => validActionTypes.includes(item.agent_action_type));
};
// Get vanilla score for a model
const getVanillaScore = (modelId, metric) => {
const vanillaEntry = allData.find(item =>
item.model_id === modelId && item.agent_action_type === 'vanilla'
);
return vanillaEntry?.scores[metric];
};
const filteredAndSortedData = chain(getFilteredData())
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
.orderBy(
[item => {
if (sortConfig.key === 'model') {
return item.model_id;
}
return item.scores[sortConfig.key] || 0;
}],
[sortConfig.direction]
)
.value();
if (loading) return <div className="container">Loading benchmark results...</div>;
if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
return (
<div className="container">
<div className="header">
<h1 className="title">Smolagents LLM Leaderboard</h1>
<p className="subtitle">How do different LLMs compare for powering agents?</p>
<p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co./datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
<p>Models marked with "JSON" are ran on a ToolCallingAgent (for proprietary models, this uses their provider's built-in tool calling modes) - others are using CodeAgent.</p>
</div>
<div className="search-container">
<div className="search-with-options">
<input
type="text"
className="search-input"
placeholder="Search models..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
/>
<div className="options-container">
<label className="option-label">
<input
type="checkbox"
checked={showVanilla}
onChange={() => setShowVanilla(!showVanilla)}
/>
Show Vanilla Scores
</label>
</div>
</div>
</div>
<div className="table-container">
<table>
<thead>
<tr>
<th onClick={() => handleSort('model')}>
Model {sortConfig.key === 'model' && (
sortConfig.direction === 'desc' ? 'β' : 'β'
)}
</th>
{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
<th key={benchmark} onClick={() => handleSort(benchmark)}>
{benchmark === "Average" ? benchmark : benchmark + ` subset`} {sortConfig.key === benchmark && (
sortConfig.direction === 'desc' ? 'β' : 'β'
)}
</th>
))}
</tr>
</thead>
<tbody>
{filteredAndSortedData.map((item, index) => {
const displayModelId = item.agent_action_type === "tool-calling" || item.agent_action_type === "tool_calling"
? `${item.model_id} - JSON`
: item.model_id;
return (
<tr key={index}>
<td className="model-cell">
<div className="model-name">{displayModelId}</div>
{showVanilla && (
getVanillaScore(item.model_id, "Average") !== undefined && (
<div className="vanilla-text"><i>Vanilla score below</i></div>
)
)}
</td>
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
<td key={metric}>
<ScoreBar score={item.scores[metric]} isVanilla={false}/>
{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
<ScoreBar score={getVanillaScore(item.model_id, metric)} isVanilla={true}/>
)}
</td>
))}
</tr>
);
})}
</tbody>
</table>
</div>
<div className="legend">
<p><strong>Hugging Face</strong> smolagents 2025</p>
</div>
</div>
);
};
export default App; |