Spaces:
Running
Running
Commit
Β·
73cf0ca
1
Parent(s):
c94926c
Added About Us Section
Browse files- app.py +53 -16
- css_html_js.py +6 -0
- parse.py +3 -1
- results.json +21 -21
- utils.py +5 -4
app.py
CHANGED
@@ -32,8 +32,8 @@ def generate_scatter_plot(benchmark, metric):
|
|
32 |
if benchmark == "RTL-Repo":
|
33 |
subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
|
34 |
detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
|
35 |
-
detailed_scores.rename(columns={'Score': 'EM'}, inplace=True)
|
36 |
-
detailed_scores['Average β¬οΈ'] = detailed_scores['EM']
|
37 |
else:
|
38 |
detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
|
39 |
detailed_scores['Average β¬οΈ'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
|
@@ -44,7 +44,6 @@ def generate_scatter_plot(benchmark, metric):
|
|
44 |
scatter_data['x'] = scatter_data['Params']
|
45 |
scatter_data['y'] = scatter_data[metric]
|
46 |
scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
|
47 |
-
|
48 |
|
49 |
type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
|
50 |
scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
|
@@ -57,9 +56,10 @@ def generate_scatter_plot(benchmark, metric):
|
|
57 |
y_range = y_axis_limits.get(metric, [0, 80])
|
58 |
|
59 |
fig = px.scatter(
|
60 |
-
scatter_data, x='x', y='y', log_x=True, size='size', color='
|
61 |
hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
|
62 |
labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
|
|
|
63 |
height=600, width=1200
|
64 |
)
|
65 |
|
@@ -99,9 +99,9 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
|
|
99 |
</p>
|
100 |
""")
|
101 |
gr.Markdown("""
|
102 |
-
Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models
|
103 |
[GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
|
104 |
-
|
105 |
""")
|
106 |
|
107 |
with gr.Tabs():
|
@@ -128,23 +128,60 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
|
|
128 |
interactive=False,
|
129 |
column_widths=["4%", "5%", "28%", "10%", "14%"],)
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
with gr.Tab("Interactive Bubble Plot"):
|
132 |
with gr.Row():
|
133 |
bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
|
134 |
bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
|
135 |
-
gr.Markdown("We show in π’ General Models, in
|
136 |
scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
)
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# event handlers, ugly way but it works
|
149 |
benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
|
150 |
model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
|
|
|
32 |
if benchmark == "RTL-Repo":
|
33 |
subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
|
34 |
detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
|
35 |
+
detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
|
36 |
+
detailed_scores['Average β¬οΈ'] = detailed_scores['Exact Matching (EM)']
|
37 |
else:
|
38 |
detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
|
39 |
detailed_scores['Average β¬οΈ'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
|
|
|
44 |
scatter_data['x'] = scatter_data['Params']
|
45 |
scatter_data['y'] = scatter_data[metric]
|
46 |
scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
|
|
|
47 |
|
48 |
type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
|
49 |
scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
|
|
|
56 |
y_range = y_axis_limits.get(metric, [0, 80])
|
57 |
|
58 |
fig = px.scatter(
|
59 |
+
scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
|
60 |
hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
|
61 |
labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
|
62 |
+
# color_discrete_map={"General": "#A8D5BA", "Coding": "#F7DC6F", "RTL-Specific": "#87CEFA"},
|
63 |
height=600, width=1200
|
64 |
)
|
65 |
|
|
|
99 |
</p>
|
100 |
""")
|
101 |
gr.Markdown("""
|
102 |
+
Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.<br/>
|
103 |
[GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
|
104 |
+
If you have any inquiries or wish to collaborate: [email protected]
|
105 |
""")
|
106 |
|
107 |
with gr.Tabs():
|
|
|
128 |
interactive=False,
|
129 |
column_widths=["4%", "5%", "28%", "10%", "14%"],)
|
130 |
|
131 |
+
with gr.Row():
|
132 |
+
with gr.Accordion("π Citation", open=False):
|
133 |
+
citation_button = gr.Textbox(
|
134 |
+
value=CITATION_BUTTON_TEXT,
|
135 |
+
label=CITATION_BUTTON_LABEL,
|
136 |
+
lines=20,
|
137 |
+
elem_id="citation-button",
|
138 |
+
show_copy_button=True,
|
139 |
+
)
|
140 |
with gr.Tab("Interactive Bubble Plot"):
|
141 |
with gr.Row():
|
142 |
bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
|
143 |
bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
|
144 |
+
gr.Markdown("We show in π’ General Models, in π΅ Coding Models and in π΄ RTL-Specific Models. Detailed information is shown when hovering over each model in the plot.")
|
145 |
scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
|
146 |
+
|
147 |
+
with gr.Tab("About Us"):
|
148 |
+
gr.HTML(
|
149 |
+
"""
|
150 |
+
<div style="max-width: 800px; margin: auto; padding: 20px; border: 1px solid #ccc; border-radius: 10px;">
|
151 |
+
<h1 style="text-align: center; font-size: 28px; margin-top: -7px;">HPAI-BSC</h1>
|
152 |
+
|
153 |
+
<p style="font-size: 18px; text-align: justify;">
|
154 |
+
The <b>High-Performance Artificial Intelligence (HPAI)</b> group is part of the
|
155 |
+
<a href="https://bsc.es/" target="_blank">Barcelona Supercomputing Center (BSC)</a>.
|
156 |
+
This leaderboard is maintained by HPAI as part of our commitment to <b>open science</b>.
|
157 |
+
</p>
|
158 |
+
|
159 |
+
<ul style="font-size: 18px; margin-bottom: 20px; margin-top: 20px;">
|
160 |
+
<li><a href="https://hpai.bsc.es/" target="_blank">Official Website</a></li>
|
161 |
+
<li><a href="https://github.com/HPAI-BSC/" target="_blank">GitHub Organization Page</a></li>
|
162 |
+
<li><a href="https://huggingface.co/HPAI-BSC/" target="_blank">Hugging Face Organization Page</a></li>
|
163 |
+
<li><a href="https://hpai.bsc.es/publications" target="_blank">Publications</a></li>
|
164 |
+
</ul>
|
165 |
+
|
166 |
+
<p style="font-size: 18px; margin-top: 15px;">
|
167 |
+
Feel free to contact us:
|
168 |
+
</p>
|
169 |
+
|
170 |
+
<p style="font-size: 18px;">Email: <a href="mailto:[email protected]"><b>[email protected]</b></a></p>
|
171 |
+
</div>
|
172 |
+
"""
|
173 |
)
|
174 |
|
175 |
+
with gr.Row():
|
176 |
+
with gr.Accordion("π Citation", open=False):
|
177 |
+
citation_button = gr.Textbox(
|
178 |
+
value=CITATION_BUTTON_TEXT,
|
179 |
+
label=CITATION_BUTTON_LABEL,
|
180 |
+
lines=20,
|
181 |
+
elem_id="citation-button",
|
182 |
+
show_copy_button=True,
|
183 |
+
)
|
184 |
+
|
185 |
# event handlers, ugly way but it works
|
186 |
benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
|
187 |
model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
|
css_html_js.py
CHANGED
@@ -1,7 +1,13 @@
|
|
1 |
custom_css = """
|
|
|
|
|
|
|
2 |
#component-1 {
|
3 |
text-align: center;
|
4 |
}
|
|
|
|
|
|
|
5 |
#component-0 {
|
6 |
width: 75vw;
|
7 |
margin: 0 auto;
|
|
|
1 |
custom_css = """
|
2 |
+
.tab-wrapper button {
|
3 |
+
font-size: 16px;
|
4 |
+
}
|
5 |
#component-1 {
|
6 |
text-align: center;
|
7 |
}
|
8 |
+
#component-3 p {
|
9 |
+
text-align: center;
|
10 |
+
}
|
11 |
#component-0 {
|
12 |
width: 75vw;
|
13 |
margin: 0 auto;
|
parse.py
CHANGED
@@ -65,6 +65,8 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
65 |
row = row[1:]
|
66 |
ctr = 0
|
67 |
for metric, bench in zip(metrics, benchs):
|
|
|
|
|
68 |
record = {}
|
69 |
record["Model"] = model
|
70 |
record["Model Type"] = type
|
@@ -92,7 +94,7 @@ def read_json():
|
|
92 |
def read_data() -> Union[pd.DataFrame, list, list, str]:
|
93 |
data = read_json()
|
94 |
df = pd.DataFrame(data)
|
95 |
-
df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score'}, inplace=True)
|
96 |
df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
|
97 |
benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
|
98 |
metrics = df['Metric'].unique().tolist()
|
|
|
65 |
row = row[1:]
|
66 |
ctr = 0
|
67 |
for metric, bench in zip(metrics, benchs):
|
68 |
+
if metric == "EM":
|
69 |
+
metric = "Exact Matching (EM)"
|
70 |
record = {}
|
71 |
record["Model"] = model
|
72 |
record["Model Type"] = type
|
|
|
94 |
def read_data() -> Union[pd.DataFrame, list, list, str]:
|
95 |
data = read_json()
|
96 |
df = pd.DataFrame(data)
|
97 |
+
df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score', 'EM': 'Exact Matching (EM)'}, inplace=True)
|
98 |
df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
|
99 |
benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
|
100 |
metrics = df['Metric'].unique().tolist()
|
results.json
CHANGED
@@ -111,7 +111,7 @@
|
|
111 |
"Model": "DeepSeek R1",
|
112 |
"Model Type": "General",
|
113 |
"Benchmark": "RTL-Repo",
|
114 |
-
"Task": "EM",
|
115 |
"Result": 33.02,
|
116 |
"Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
117 |
"Params": 685
|
@@ -336,7 +336,7 @@
|
|
336 |
"Model": "Llama 3.1 405B",
|
337 |
"Model Type": "General",
|
338 |
"Benchmark": "RTL-Repo",
|
339 |
-
"Task": "EM",
|
340 |
"Result": 33.29,
|
341 |
"Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
|
342 |
"Params": 406
|
@@ -561,7 +561,7 @@
|
|
561 |
"Model": "Llama 3.(1-3) 70B",
|
562 |
"Model Type": "General",
|
563 |
"Benchmark": "RTL-Repo",
|
564 |
-
"Task": "EM",
|
565 |
"Result": 28.62,
|
566 |
"Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
567 |
"Params": 70.6
|
@@ -786,7 +786,7 @@
|
|
786 |
"Model": "Qwen2.5 72B",
|
787 |
"Model Type": "General",
|
788 |
"Benchmark": "RTL-Repo",
|
789 |
-
"Task": "EM",
|
790 |
"Result": 37.19,
|
791 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
792 |
"Params": 72.7
|
@@ -1011,7 +1011,7 @@
|
|
1011 |
"Model": "Qwen2.5 32B",
|
1012 |
"Model Type": "General",
|
1013 |
"Benchmark": "RTL-Repo",
|
1014 |
-
"Task": "EM",
|
1015 |
"Result": 28.67,
|
1016 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
|
1017 |
"Params": 32.5
|
@@ -1236,7 +1236,7 @@
|
|
1236 |
"Model": "StarChat2 15B v0.1",
|
1237 |
"Model Type": "General",
|
1238 |
"Benchmark": "RTL-Repo",
|
1239 |
-
"Task": "EM",
|
1240 |
"Result": 13.24,
|
1241 |
"Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
1242 |
"Params": 16
|
@@ -1461,7 +1461,7 @@
|
|
1461 |
"Model": "DeepSeek R1 Distill Qwen 14B",
|
1462 |
"Model Type": "General",
|
1463 |
"Benchmark": "RTL-Repo",
|
1464 |
-
"Task": "EM",
|
1465 |
"Result": 20.65,
|
1466 |
"Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
1467 |
"Params": 14.8
|
@@ -1686,7 +1686,7 @@
|
|
1686 |
"Model": "CodeLlama 70B",
|
1687 |
"Model Type": "Coding",
|
1688 |
"Benchmark": "RTL-Repo",
|
1689 |
-
"Task": "EM",
|
1690 |
"Result": 24.58,
|
1691 |
"Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
|
1692 |
"Params": 69
|
@@ -1911,7 +1911,7 @@
|
|
1911 |
"Model": "QwenCoder 2.5 32B",
|
1912 |
"Model Type": "Coding",
|
1913 |
"Benchmark": "RTL-Repo",
|
1914 |
-
"Task": "EM",
|
1915 |
"Result": 30.44,
|
1916 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
1917 |
"Params": 32.5
|
@@ -2136,7 +2136,7 @@
|
|
2136 |
"Model": "DeepSeek Coder 33B",
|
2137 |
"Model Type": "Coding",
|
2138 |
"Benchmark": "RTL-Repo",
|
2139 |
-
"Task": "EM",
|
2140 |
"Result": 30.58,
|
2141 |
"Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
2142 |
"Params": 33.3
|
@@ -2361,7 +2361,7 @@
|
|
2361 |
"Model": "QwenCoder 2.5 14B",
|
2362 |
"Model Type": "Coding",
|
2363 |
"Benchmark": "RTL-Repo",
|
2364 |
-
"Task": "EM",
|
2365 |
"Result": 37.16,
|
2366 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
2367 |
"Params": 14.7
|
@@ -2586,7 +2586,7 @@
|
|
2586 |
"Model": "OpenCoder 8B",
|
2587 |
"Model Type": "Coding",
|
2588 |
"Benchmark": "RTL-Repo",
|
2589 |
-
"Task": "EM",
|
2590 |
"Result": 16.63,
|
2591 |
"Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
2592 |
"Params": 7.77
|
@@ -2811,7 +2811,7 @@
|
|
2811 |
"Model": "QwenCoder 2.5 7B",
|
2812 |
"Model Type": "Coding",
|
2813 |
"Benchmark": "RTL-Repo",
|
2814 |
-
"Task": "EM",
|
2815 |
"Result": 28.45,
|
2816 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
2817 |
"Params": 7.61
|
@@ -3036,7 +3036,7 @@
|
|
3036 |
"Model": "DeepSeek Coder 6,7B",
|
3037 |
"Model Type": "Coding",
|
3038 |
"Benchmark": "RTL-Repo",
|
3039 |
-
"Task": "EM",
|
3040 |
"Result": 24.57,
|
3041 |
"Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
3042 |
"Params": 6.74
|
@@ -3261,7 +3261,7 @@
|
|
3261 |
"Model": "RTLCoder Mistral",
|
3262 |
"Model Type": "RTL-Specific",
|
3263 |
"Benchmark": "RTL-Repo",
|
3264 |
-
"Task": "EM",
|
3265 |
"Result": 14.97,
|
3266 |
"Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
3267 |
"Params": 7.24
|
@@ -3486,7 +3486,7 @@
|
|
3486 |
"Model": "RTLCoder DeepSeek",
|
3487 |
"Model Type": "RTL-Specific",
|
3488 |
"Benchmark": "RTL-Repo",
|
3489 |
-
"Task": "EM",
|
3490 |
"Result": 19.76,
|
3491 |
"Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
3492 |
"Params": 6.74
|
@@ -3711,7 +3711,7 @@
|
|
3711 |
"Model": "OriGen",
|
3712 |
"Model Type": "RTL-Specific",
|
3713 |
"Benchmark": "RTL-Repo",
|
3714 |
-
"Task": "EM",
|
3715 |
"Result": 19.45,
|
3716 |
"Model URL": "https://huggingface.co/henryen/OriGen_Fix",
|
3717 |
"Params": 6.74
|
@@ -3936,7 +3936,7 @@
|
|
3936 |
"Model": "HaVen-CodeQwen",
|
3937 |
"Model Type": "RTL-Specific",
|
3938 |
"Benchmark": "RTL-Repo",
|
3939 |
-
"Task": "EM",
|
3940 |
"Result": 25.38,
|
3941 |
"Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
3942 |
"Params": 7.25
|
@@ -4161,7 +4161,7 @@
|
|
4161 |
"Model": "CodeV-CL-7B",
|
4162 |
"Model Type": "RTL-Specific",
|
4163 |
"Benchmark": "RTL-Repo",
|
4164 |
-
"Task": "EM",
|
4165 |
"Result": 12.39,
|
4166 |
"Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
|
4167 |
"Params": 6.74
|
@@ -4386,7 +4386,7 @@
|
|
4386 |
"Model": "CodeV-QW-7B",
|
4387 |
"Model Type": "RTL-Specific",
|
4388 |
"Benchmark": "RTL-Repo",
|
4389 |
-
"Task": "EM",
|
4390 |
"Result": 20.56,
|
4391 |
"Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
|
4392 |
"Params": 7.25
|
@@ -4611,7 +4611,7 @@
|
|
4611 |
"Model": "CodeV-DS-6.7B",
|
4612 |
"Model Type": "RTL-Specific",
|
4613 |
"Benchmark": "RTL-Repo",
|
4614 |
-
"Task": "EM",
|
4615 |
"Result": 21.06,
|
4616 |
"Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
4617 |
"Params": 6.74
|
|
|
111 |
"Model": "DeepSeek R1",
|
112 |
"Model Type": "General",
|
113 |
"Benchmark": "RTL-Repo",
|
114 |
+
"Task": "Exact Matching (EM)",
|
115 |
"Result": 33.02,
|
116 |
"Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
117 |
"Params": 685
|
|
|
336 |
"Model": "Llama 3.1 405B",
|
337 |
"Model Type": "General",
|
338 |
"Benchmark": "RTL-Repo",
|
339 |
+
"Task": "Exact Matching (EM)",
|
340 |
"Result": 33.29,
|
341 |
"Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
|
342 |
"Params": 406
|
|
|
561 |
"Model": "Llama 3.(1-3) 70B",
|
562 |
"Model Type": "General",
|
563 |
"Benchmark": "RTL-Repo",
|
564 |
+
"Task": "Exact Matching (EM)",
|
565 |
"Result": 28.62,
|
566 |
"Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
567 |
"Params": 70.6
|
|
|
786 |
"Model": "Qwen2.5 72B",
|
787 |
"Model Type": "General",
|
788 |
"Benchmark": "RTL-Repo",
|
789 |
+
"Task": "Exact Matching (EM)",
|
790 |
"Result": 37.19,
|
791 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
792 |
"Params": 72.7
|
|
|
1011 |
"Model": "Qwen2.5 32B",
|
1012 |
"Model Type": "General",
|
1013 |
"Benchmark": "RTL-Repo",
|
1014 |
+
"Task": "Exact Matching (EM)",
|
1015 |
"Result": 28.67,
|
1016 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
|
1017 |
"Params": 32.5
|
|
|
1236 |
"Model": "StarChat2 15B v0.1",
|
1237 |
"Model Type": "General",
|
1238 |
"Benchmark": "RTL-Repo",
|
1239 |
+
"Task": "Exact Matching (EM)",
|
1240 |
"Result": 13.24,
|
1241 |
"Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
1242 |
"Params": 16
|
|
|
1461 |
"Model": "DeepSeek R1 Distill Qwen 14B",
|
1462 |
"Model Type": "General",
|
1463 |
"Benchmark": "RTL-Repo",
|
1464 |
+
"Task": "Exact Matching (EM)",
|
1465 |
"Result": 20.65,
|
1466 |
"Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
1467 |
"Params": 14.8
|
|
|
1686 |
"Model": "CodeLlama 70B",
|
1687 |
"Model Type": "Coding",
|
1688 |
"Benchmark": "RTL-Repo",
|
1689 |
+
"Task": "Exact Matching (EM)",
|
1690 |
"Result": 24.58,
|
1691 |
"Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
|
1692 |
"Params": 69
|
|
|
1911 |
"Model": "QwenCoder 2.5 32B",
|
1912 |
"Model Type": "Coding",
|
1913 |
"Benchmark": "RTL-Repo",
|
1914 |
+
"Task": "Exact Matching (EM)",
|
1915 |
"Result": 30.44,
|
1916 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
1917 |
"Params": 32.5
|
|
|
2136 |
"Model": "DeepSeek Coder 33B",
|
2137 |
"Model Type": "Coding",
|
2138 |
"Benchmark": "RTL-Repo",
|
2139 |
+
"Task": "Exact Matching (EM)",
|
2140 |
"Result": 30.58,
|
2141 |
"Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
2142 |
"Params": 33.3
|
|
|
2361 |
"Model": "QwenCoder 2.5 14B",
|
2362 |
"Model Type": "Coding",
|
2363 |
"Benchmark": "RTL-Repo",
|
2364 |
+
"Task": "Exact Matching (EM)",
|
2365 |
"Result": 37.16,
|
2366 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
2367 |
"Params": 14.7
|
|
|
2586 |
"Model": "OpenCoder 8B",
|
2587 |
"Model Type": "Coding",
|
2588 |
"Benchmark": "RTL-Repo",
|
2589 |
+
"Task": "Exact Matching (EM)",
|
2590 |
"Result": 16.63,
|
2591 |
"Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
2592 |
"Params": 7.77
|
|
|
2811 |
"Model": "QwenCoder 2.5 7B",
|
2812 |
"Model Type": "Coding",
|
2813 |
"Benchmark": "RTL-Repo",
|
2814 |
+
"Task": "Exact Matching (EM)",
|
2815 |
"Result": 28.45,
|
2816 |
"Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
2817 |
"Params": 7.61
|
|
|
3036 |
"Model": "DeepSeek Coder 6,7B",
|
3037 |
"Model Type": "Coding",
|
3038 |
"Benchmark": "RTL-Repo",
|
3039 |
+
"Task": "Exact Matching (EM)",
|
3040 |
"Result": 24.57,
|
3041 |
"Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
3042 |
"Params": 6.74
|
|
|
3261 |
"Model": "RTLCoder Mistral",
|
3262 |
"Model Type": "RTL-Specific",
|
3263 |
"Benchmark": "RTL-Repo",
|
3264 |
+
"Task": "Exact Matching (EM)",
|
3265 |
"Result": 14.97,
|
3266 |
"Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
3267 |
"Params": 7.24
|
|
|
3486 |
"Model": "RTLCoder DeepSeek",
|
3487 |
"Model Type": "RTL-Specific",
|
3488 |
"Benchmark": "RTL-Repo",
|
3489 |
+
"Task": "Exact Matching (EM)",
|
3490 |
"Result": 19.76,
|
3491 |
"Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
3492 |
"Params": 6.74
|
|
|
3711 |
"Model": "OriGen",
|
3712 |
"Model Type": "RTL-Specific",
|
3713 |
"Benchmark": "RTL-Repo",
|
3714 |
+
"Task": "Exact Matching (EM)",
|
3715 |
"Result": 19.45,
|
3716 |
"Model URL": "https://huggingface.co/henryen/OriGen_Fix",
|
3717 |
"Params": 6.74
|
|
|
3936 |
"Model": "HaVen-CodeQwen",
|
3937 |
"Model Type": "RTL-Specific",
|
3938 |
"Benchmark": "RTL-Repo",
|
3939 |
+
"Task": "Exact Matching (EM)",
|
3940 |
"Result": 25.38,
|
3941 |
"Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
3942 |
"Params": 7.25
|
|
|
4161 |
"Model": "CodeV-CL-7B",
|
4162 |
"Model Type": "RTL-Specific",
|
4163 |
"Benchmark": "RTL-Repo",
|
4164 |
+
"Task": "Exact Matching (EM)",
|
4165 |
"Result": 12.39,
|
4166 |
"Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
|
4167 |
"Params": 6.74
|
|
|
4386 |
"Model": "CodeV-QW-7B",
|
4387 |
"Model Type": "RTL-Specific",
|
4388 |
"Benchmark": "RTL-Repo",
|
4389 |
+
"Task": "Exact Matching (EM)",
|
4390 |
"Result": 20.56,
|
4391 |
"Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
|
4392 |
"Params": 7.25
|
|
|
4611 |
"Model": "CodeV-DS-6.7B",
|
4612 |
"Model Type": "RTL-Specific",
|
4613 |
"Benchmark": "RTL-Repo",
|
4614 |
+
"Task": "Exact Matching (EM)",
|
4615 |
"Result": 21.06,
|
4616 |
"Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
4617 |
"Params": 6.74
|
utils.py
CHANGED
@@ -5,19 +5,19 @@ import plotly.express as px
|
|
5 |
import numpy as np
|
6 |
|
7 |
type_emoji = {
|
8 |
-
"RTL-Specific": "
|
9 |
"General": "π’",
|
10 |
-
"Coding": "
|
11 |
}
|
12 |
|
13 |
def model_hyperlink(link, model_name):
|
14 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
15 |
|
16 |
def handle_special_cases(benchmark, metric):
|
17 |
-
if metric == 'EM':
|
18 |
benchmark = 'RTL-Repo'
|
19 |
elif benchmark == 'RTL-Repo':
|
20 |
-
metric = 'EM'
|
21 |
return benchmark, metric
|
22 |
|
23 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
@@ -27,6 +27,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
27 |
filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
28 |
filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
29 |
filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
|
|
|
30 |
filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
|
31 |
return filtered_df
|
32 |
|
|
|
5 |
import numpy as np
|
6 |
|
7 |
type_emoji = {
|
8 |
+
"RTL-Specific": "π΄",
|
9 |
"General": "π’",
|
10 |
+
"Coding": "π΅"
|
11 |
}
|
12 |
|
13 |
def model_hyperlink(link, model_name):
|
14 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
15 |
|
16 |
def handle_special_cases(benchmark, metric):
|
17 |
+
if metric == 'Exact Matching (EM)':
|
18 |
benchmark = 'RTL-Repo'
|
19 |
elif benchmark == 'RTL-Repo':
|
20 |
+
metric = 'Exact Matching (EM)'
|
21 |
return benchmark, metric
|
22 |
|
23 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
|
27 |
filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
|
28 |
filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
|
29 |
filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
|
30 |
+
filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
|
31 |
filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
|
32 |
return filtered_df
|
33 |
|