ggcristian commited on
Commit
73cf0ca
Β·
1 Parent(s): c94926c

Added About Us Section

Browse files
Files changed (5) hide show
  1. app.py +53 -16
  2. css_html_js.py +6 -0
  3. parse.py +3 -1
  4. results.json +21 -21
  5. utils.py +5 -4
app.py CHANGED
@@ -32,8 +32,8 @@ def generate_scatter_plot(benchmark, metric):
32
  if benchmark == "RTL-Repo":
33
  subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
34
  detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
35
- detailed_scores.rename(columns={'Score': 'EM'}, inplace=True)
36
- detailed_scores['Average ⬆️'] = detailed_scores['EM']
37
  else:
38
  detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
39
  detailed_scores['Average ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
@@ -44,7 +44,6 @@ def generate_scatter_plot(benchmark, metric):
44
  scatter_data['x'] = scatter_data['Params']
45
  scatter_data['y'] = scatter_data[metric]
46
  scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
47
-
48
 
49
  type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
50
  scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
@@ -57,9 +56,10 @@ def generate_scatter_plot(benchmark, metric):
57
  y_range = y_axis_limits.get(metric, [0, 80])
58
 
59
  fig = px.scatter(
60
- scatter_data, x='x', y='y', log_x=True, size='size', color='color', text='Model',
61
  hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
62
  labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
 
63
  height=600, width=1200
64
  )
65
 
@@ -99,9 +99,9 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
99
  </p>
100
  """)
101
  gr.Markdown("""
102
- Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.
103
  [GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
104
- Contact us: [email protected]
105
  """)
106
 
107
  with gr.Tabs():
@@ -128,23 +128,60 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
128
  interactive=False,
129
  column_widths=["4%", "5%", "28%", "10%", "14%"],)
130
 
 
 
 
 
 
 
 
 
 
131
  with gr.Tab("Interactive Bubble Plot"):
132
  with gr.Row():
133
  bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
134
  bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
135
- gr.Markdown("We show in 🟒 General Models, in 🟑 Coding Models and in πŸ”΅ RTL-Specific Models. Detailed information is shown when hovering over each model in the plot.")
136
  scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
137
-
138
- with gr.Row():
139
- with gr.Accordion("πŸ“™ Citation", open=False):
140
- citation_button = gr.Textbox(
141
- value=CITATION_BUTTON_TEXT,
142
- label=CITATION_BUTTON_LABEL,
143
- lines=20,
144
- elem_id="citation-button",
145
- show_copy_button=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  )
147
 
 
 
 
 
 
 
 
 
 
 
148
  # event handlers, ugly way but it works
149
  benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
150
  model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
 
32
  if benchmark == "RTL-Repo":
33
  subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
34
  detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
35
+ detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
36
+ detailed_scores['Average ⬆️'] = detailed_scores['Exact Matching (EM)']
37
  else:
38
  detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
39
  detailed_scores['Average ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
 
44
  scatter_data['x'] = scatter_data['Params']
45
  scatter_data['y'] = scatter_data[metric]
46
  scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
 
47
 
48
  type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
49
  scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
 
56
  y_range = y_axis_limits.get(metric, [0, 80])
57
 
58
  fig = px.scatter(
59
+ scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
60
  hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
61
  labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
62
+ # color_discrete_map={"General": "#A8D5BA", "Coding": "#F7DC6F", "RTL-Specific": "#87CEFA"},
63
  height=600, width=1200
64
  )
65
 
 
99
  </p>
100
  """)
101
  gr.Markdown("""
102
+ Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.<br/>
103
  [GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
104
+ If you have any inquiries or wish to collaborate: [email protected]
105
  """)
106
 
107
  with gr.Tabs():
 
128
  interactive=False,
129
  column_widths=["4%", "5%", "28%", "10%", "14%"],)
130
 
131
+ with gr.Row():
132
+ with gr.Accordion("πŸ“™ Citation", open=False):
133
+ citation_button = gr.Textbox(
134
+ value=CITATION_BUTTON_TEXT,
135
+ label=CITATION_BUTTON_LABEL,
136
+ lines=20,
137
+ elem_id="citation-button",
138
+ show_copy_button=True,
139
+ )
140
  with gr.Tab("Interactive Bubble Plot"):
141
  with gr.Row():
142
  bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
143
  bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
144
+ gr.Markdown("We show in 🟒 General Models, in πŸ”΅ Coding Models and in πŸ”΄ RTL-Specific Models. Detailed information is shown when hovering over each model in the plot.")
145
  scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
146
+
147
+ with gr.Tab("About Us"):
148
+ gr.HTML(
149
+ """
150
+ <div style="max-width: 800px; margin: auto; padding: 20px; border: 1px solid #ccc; border-radius: 10px;">
151
+ <h1 style="text-align: center; font-size: 28px; margin-top: -7px;">HPAI-BSC</h1>
152
+
153
+ <p style="font-size: 18px; text-align: justify;">
154
+ The <b>High-Performance Artificial Intelligence (HPAI)</b> group is part of the
155
+ <a href="https://bsc.es/" target="_blank">Barcelona Supercomputing Center (BSC)</a>.
156
+ This leaderboard is maintained by HPAI as part of our commitment to <b>open science</b>.
157
+ </p>
158
+
159
+ <ul style="font-size: 18px; margin-bottom: 20px; margin-top: 20px;">
160
+ <li><a href="https://hpai.bsc.es/" target="_blank">Official Website</a></li>
161
+ <li><a href="https://github.com/HPAI-BSC/" target="_blank">GitHub Organization Page</a></li>
162
+ <li><a href="https://huggingface.co/HPAI-BSC/" target="_blank">Hugging Face Organization Page</a></li>
163
+ <li><a href="https://hpai.bsc.es/publications" target="_blank">Publications</a></li>
164
+ </ul>
165
+
166
+ <p style="font-size: 18px; margin-top: 15px;">
167
+ Feel free to contact us:
168
+ </p>
169
+
170
+ <p style="font-size: 18px;">Email: <a href="mailto:[email protected]"><b>[email protected]</b></a></p>
171
+ </div>
172
+ """
173
  )
174
 
175
+ with gr.Row():
176
+ with gr.Accordion("πŸ“™ Citation", open=False):
177
+ citation_button = gr.Textbox(
178
+ value=CITATION_BUTTON_TEXT,
179
+ label=CITATION_BUTTON_LABEL,
180
+ lines=20,
181
+ elem_id="citation-button",
182
+ show_copy_button=True,
183
+ )
184
+
185
  # event handlers, ugly way but it works
186
  benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
187
  model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
css_html_js.py CHANGED
@@ -1,7 +1,13 @@
1
  custom_css = """
 
 
 
2
  #component-1 {
3
  text-align: center;
4
  }
 
 
 
5
  #component-0 {
6
  width: 75vw;
7
  margin: 0 auto;
 
1
  custom_css = """
2
+ .tab-wrapper button {
3
+ font-size: 16px;
4
+ }
5
  #component-1 {
6
  text-align: center;
7
  }
8
+ #component-3 p {
9
+ text-align: center;
10
+ }
11
  #component-0 {
12
  width: 75vw;
13
  margin: 0 auto;
parse.py CHANGED
@@ -65,6 +65,8 @@ def parse_results(csv_path: str) -> list[dict]:
65
  row = row[1:]
66
  ctr = 0
67
  for metric, bench in zip(metrics, benchs):
 
 
68
  record = {}
69
  record["Model"] = model
70
  record["Model Type"] = type
@@ -92,7 +94,7 @@ def read_json():
92
  def read_data() -> Union[pd.DataFrame, list, list, str]:
93
  data = read_json()
94
  df = pd.DataFrame(data)
95
- df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score'}, inplace=True)
96
  df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
97
  benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
98
  metrics = df['Metric'].unique().tolist()
 
65
  row = row[1:]
66
  ctr = 0
67
  for metric, bench in zip(metrics, benchs):
68
+ if metric == "EM":
69
+ metric = "Exact Matching (EM)"
70
  record = {}
71
  record["Model"] = model
72
  record["Model Type"] = type
 
94
  def read_data() -> Union[pd.DataFrame, list, list, str]:
95
  data = read_json()
96
  df = pd.DataFrame(data)
97
+ df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score', 'EM': 'Exact Matching (EM)'}, inplace=True)
98
  df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
99
  benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
100
  metrics = df['Metric'].unique().tolist()
results.json CHANGED
@@ -111,7 +111,7 @@
111
  "Model": "DeepSeek R1",
112
  "Model Type": "General",
113
  "Benchmark": "RTL-Repo",
114
- "Task": "EM",
115
  "Result": 33.02,
116
  "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
117
  "Params": 685
@@ -336,7 +336,7 @@
336
  "Model": "Llama 3.1 405B",
337
  "Model Type": "General",
338
  "Benchmark": "RTL-Repo",
339
- "Task": "EM",
340
  "Result": 33.29,
341
  "Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
342
  "Params": 406
@@ -561,7 +561,7 @@
561
  "Model": "Llama 3.(1-3) 70B",
562
  "Model Type": "General",
563
  "Benchmark": "RTL-Repo",
564
- "Task": "EM",
565
  "Result": 28.62,
566
  "Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
567
  "Params": 70.6
@@ -786,7 +786,7 @@
786
  "Model": "Qwen2.5 72B",
787
  "Model Type": "General",
788
  "Benchmark": "RTL-Repo",
789
- "Task": "EM",
790
  "Result": 37.19,
791
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
792
  "Params": 72.7
@@ -1011,7 +1011,7 @@
1011
  "Model": "Qwen2.5 32B",
1012
  "Model Type": "General",
1013
  "Benchmark": "RTL-Repo",
1014
- "Task": "EM",
1015
  "Result": 28.67,
1016
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
1017
  "Params": 32.5
@@ -1236,7 +1236,7 @@
1236
  "Model": "StarChat2 15B v0.1",
1237
  "Model Type": "General",
1238
  "Benchmark": "RTL-Repo",
1239
- "Task": "EM",
1240
  "Result": 13.24,
1241
  "Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
1242
  "Params": 16
@@ -1461,7 +1461,7 @@
1461
  "Model": "DeepSeek R1 Distill Qwen 14B",
1462
  "Model Type": "General",
1463
  "Benchmark": "RTL-Repo",
1464
- "Task": "EM",
1465
  "Result": 20.65,
1466
  "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
1467
  "Params": 14.8
@@ -1686,7 +1686,7 @@
1686
  "Model": "CodeLlama 70B",
1687
  "Model Type": "Coding",
1688
  "Benchmark": "RTL-Repo",
1689
- "Task": "EM",
1690
  "Result": 24.58,
1691
  "Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
1692
  "Params": 69
@@ -1911,7 +1911,7 @@
1911
  "Model": "QwenCoder 2.5 32B",
1912
  "Model Type": "Coding",
1913
  "Benchmark": "RTL-Repo",
1914
- "Task": "EM",
1915
  "Result": 30.44,
1916
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
1917
  "Params": 32.5
@@ -2136,7 +2136,7 @@
2136
  "Model": "DeepSeek Coder 33B",
2137
  "Model Type": "Coding",
2138
  "Benchmark": "RTL-Repo",
2139
- "Task": "EM",
2140
  "Result": 30.58,
2141
  "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
2142
  "Params": 33.3
@@ -2361,7 +2361,7 @@
2361
  "Model": "QwenCoder 2.5 14B",
2362
  "Model Type": "Coding",
2363
  "Benchmark": "RTL-Repo",
2364
- "Task": "EM",
2365
  "Result": 37.16,
2366
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
2367
  "Params": 14.7
@@ -2586,7 +2586,7 @@
2586
  "Model": "OpenCoder 8B",
2587
  "Model Type": "Coding",
2588
  "Benchmark": "RTL-Repo",
2589
- "Task": "EM",
2590
  "Result": 16.63,
2591
  "Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
2592
  "Params": 7.77
@@ -2811,7 +2811,7 @@
2811
  "Model": "QwenCoder 2.5 7B",
2812
  "Model Type": "Coding",
2813
  "Benchmark": "RTL-Repo",
2814
- "Task": "EM",
2815
  "Result": 28.45,
2816
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
2817
  "Params": 7.61
@@ -3036,7 +3036,7 @@
3036
  "Model": "DeepSeek Coder 6,7B",
3037
  "Model Type": "Coding",
3038
  "Benchmark": "RTL-Repo",
3039
- "Task": "EM",
3040
  "Result": 24.57,
3041
  "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
3042
  "Params": 6.74
@@ -3261,7 +3261,7 @@
3261
  "Model": "RTLCoder Mistral",
3262
  "Model Type": "RTL-Specific",
3263
  "Benchmark": "RTL-Repo",
3264
- "Task": "EM",
3265
  "Result": 14.97,
3266
  "Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
3267
  "Params": 7.24
@@ -3486,7 +3486,7 @@
3486
  "Model": "RTLCoder DeepSeek",
3487
  "Model Type": "RTL-Specific",
3488
  "Benchmark": "RTL-Repo",
3489
- "Task": "EM",
3490
  "Result": 19.76,
3491
  "Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
3492
  "Params": 6.74
@@ -3711,7 +3711,7 @@
3711
  "Model": "OriGen",
3712
  "Model Type": "RTL-Specific",
3713
  "Benchmark": "RTL-Repo",
3714
- "Task": "EM",
3715
  "Result": 19.45,
3716
  "Model URL": "https://huggingface.co/henryen/OriGen_Fix",
3717
  "Params": 6.74
@@ -3936,7 +3936,7 @@
3936
  "Model": "HaVen-CodeQwen",
3937
  "Model Type": "RTL-Specific",
3938
  "Benchmark": "RTL-Repo",
3939
- "Task": "EM",
3940
  "Result": 25.38,
3941
  "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
3942
  "Params": 7.25
@@ -4161,7 +4161,7 @@
4161
  "Model": "CodeV-CL-7B",
4162
  "Model Type": "RTL-Specific",
4163
  "Benchmark": "RTL-Repo",
4164
- "Task": "EM",
4165
  "Result": 12.39,
4166
  "Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
4167
  "Params": 6.74
@@ -4386,7 +4386,7 @@
4386
  "Model": "CodeV-QW-7B",
4387
  "Model Type": "RTL-Specific",
4388
  "Benchmark": "RTL-Repo",
4389
- "Task": "EM",
4390
  "Result": 20.56,
4391
  "Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
4392
  "Params": 7.25
@@ -4611,7 +4611,7 @@
4611
  "Model": "CodeV-DS-6.7B",
4612
  "Model Type": "RTL-Specific",
4613
  "Benchmark": "RTL-Repo",
4614
- "Task": "EM",
4615
  "Result": 21.06,
4616
  "Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
4617
  "Params": 6.74
 
111
  "Model": "DeepSeek R1",
112
  "Model Type": "General",
113
  "Benchmark": "RTL-Repo",
114
+ "Task": "Exact Matching (EM)",
115
  "Result": 33.02,
116
  "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
117
  "Params": 685
 
336
  "Model": "Llama 3.1 405B",
337
  "Model Type": "General",
338
  "Benchmark": "RTL-Repo",
339
+ "Task": "Exact Matching (EM)",
340
  "Result": 33.29,
341
  "Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
342
  "Params": 406
 
561
  "Model": "Llama 3.(1-3) 70B",
562
  "Model Type": "General",
563
  "Benchmark": "RTL-Repo",
564
+ "Task": "Exact Matching (EM)",
565
  "Result": 28.62,
566
  "Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
567
  "Params": 70.6
 
786
  "Model": "Qwen2.5 72B",
787
  "Model Type": "General",
788
  "Benchmark": "RTL-Repo",
789
+ "Task": "Exact Matching (EM)",
790
  "Result": 37.19,
791
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
792
  "Params": 72.7
 
1011
  "Model": "Qwen2.5 32B",
1012
  "Model Type": "General",
1013
  "Benchmark": "RTL-Repo",
1014
+ "Task": "Exact Matching (EM)",
1015
  "Result": 28.67,
1016
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
1017
  "Params": 32.5
 
1236
  "Model": "StarChat2 15B v0.1",
1237
  "Model Type": "General",
1238
  "Benchmark": "RTL-Repo",
1239
+ "Task": "Exact Matching (EM)",
1240
  "Result": 13.24,
1241
  "Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
1242
  "Params": 16
 
1461
  "Model": "DeepSeek R1 Distill Qwen 14B",
1462
  "Model Type": "General",
1463
  "Benchmark": "RTL-Repo",
1464
+ "Task": "Exact Matching (EM)",
1465
  "Result": 20.65,
1466
  "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
1467
  "Params": 14.8
 
1686
  "Model": "CodeLlama 70B",
1687
  "Model Type": "Coding",
1688
  "Benchmark": "RTL-Repo",
1689
+ "Task": "Exact Matching (EM)",
1690
  "Result": 24.58,
1691
  "Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
1692
  "Params": 69
 
1911
  "Model": "QwenCoder 2.5 32B",
1912
  "Model Type": "Coding",
1913
  "Benchmark": "RTL-Repo",
1914
+ "Task": "Exact Matching (EM)",
1915
  "Result": 30.44,
1916
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
1917
  "Params": 32.5
 
2136
  "Model": "DeepSeek Coder 33B",
2137
  "Model Type": "Coding",
2138
  "Benchmark": "RTL-Repo",
2139
+ "Task": "Exact Matching (EM)",
2140
  "Result": 30.58,
2141
  "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
2142
  "Params": 33.3
 
2361
  "Model": "QwenCoder 2.5 14B",
2362
  "Model Type": "Coding",
2363
  "Benchmark": "RTL-Repo",
2364
+ "Task": "Exact Matching (EM)",
2365
  "Result": 37.16,
2366
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
2367
  "Params": 14.7
 
2586
  "Model": "OpenCoder 8B",
2587
  "Model Type": "Coding",
2588
  "Benchmark": "RTL-Repo",
2589
+ "Task": "Exact Matching (EM)",
2590
  "Result": 16.63,
2591
  "Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
2592
  "Params": 7.77
 
2811
  "Model": "QwenCoder 2.5 7B",
2812
  "Model Type": "Coding",
2813
  "Benchmark": "RTL-Repo",
2814
+ "Task": "Exact Matching (EM)",
2815
  "Result": 28.45,
2816
  "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
2817
  "Params": 7.61
 
3036
  "Model": "DeepSeek Coder 6,7B",
3037
  "Model Type": "Coding",
3038
  "Benchmark": "RTL-Repo",
3039
+ "Task": "Exact Matching (EM)",
3040
  "Result": 24.57,
3041
  "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
3042
  "Params": 6.74
 
3261
  "Model": "RTLCoder Mistral",
3262
  "Model Type": "RTL-Specific",
3263
  "Benchmark": "RTL-Repo",
3264
+ "Task": "Exact Matching (EM)",
3265
  "Result": 14.97,
3266
  "Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
3267
  "Params": 7.24
 
3486
  "Model": "RTLCoder DeepSeek",
3487
  "Model Type": "RTL-Specific",
3488
  "Benchmark": "RTL-Repo",
3489
+ "Task": "Exact Matching (EM)",
3490
  "Result": 19.76,
3491
  "Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
3492
  "Params": 6.74
 
3711
  "Model": "OriGen",
3712
  "Model Type": "RTL-Specific",
3713
  "Benchmark": "RTL-Repo",
3714
+ "Task": "Exact Matching (EM)",
3715
  "Result": 19.45,
3716
  "Model URL": "https://huggingface.co/henryen/OriGen_Fix",
3717
  "Params": 6.74
 
3936
  "Model": "HaVen-CodeQwen",
3937
  "Model Type": "RTL-Specific",
3938
  "Benchmark": "RTL-Repo",
3939
+ "Task": "Exact Matching (EM)",
3940
  "Result": 25.38,
3941
  "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
3942
  "Params": 7.25
 
4161
  "Model": "CodeV-CL-7B",
4162
  "Model Type": "RTL-Specific",
4163
  "Benchmark": "RTL-Repo",
4164
+ "Task": "Exact Matching (EM)",
4165
  "Result": 12.39,
4166
  "Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
4167
  "Params": 6.74
 
4386
  "Model": "CodeV-QW-7B",
4387
  "Model Type": "RTL-Specific",
4388
  "Benchmark": "RTL-Repo",
4389
+ "Task": "Exact Matching (EM)",
4390
  "Result": 20.56,
4391
  "Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
4392
  "Params": 7.25
 
4611
  "Model": "CodeV-DS-6.7B",
4612
  "Model Type": "RTL-Specific",
4613
  "Benchmark": "RTL-Repo",
4614
+ "Task": "Exact Matching (EM)",
4615
  "Result": 21.06,
4616
  "Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
4617
  "Params": 6.74
utils.py CHANGED
@@ -5,19 +5,19 @@ import plotly.express as px
5
  import numpy as np
6
 
7
  type_emoji = {
8
- "RTL-Specific": "πŸ”΅",
9
  "General": "🟒",
10
- "Coding": "🟑"
11
  }
12
 
13
  def model_hyperlink(link, model_name):
14
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
15
 
16
  def handle_special_cases(benchmark, metric):
17
- if metric == 'EM':
18
  benchmark = 'RTL-Repo'
19
  elif benchmark == 'RTL-Repo':
20
- metric = 'EM'
21
  return benchmark, metric
22
 
23
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
@@ -27,6 +27,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
27
  filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
28
  filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
29
  filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
 
30
  filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
31
  return filtered_df
32
 
 
5
  import numpy as np
6
 
7
  type_emoji = {
8
+ "RTL-Specific": "πŸ”΄",
9
  "General": "🟒",
10
+ "Coding": "πŸ”΅"
11
  }
12
 
13
  def model_hyperlink(link, model_name):
14
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
15
 
16
  def handle_special_cases(benchmark, metric):
17
+ if metric == 'Exact Matching (EM)':
18
  benchmark = 'RTL-Repo'
19
  elif benchmark == 'RTL-Repo':
20
+ metric = 'Exact Matching (EM)'
21
  return benchmark, metric
22
 
23
  def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
 
27
  filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
28
  filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
29
  filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
30
+ filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
31
  filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
32
  return filtered_df
33