Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

App Files Files Community

ggcristian commited on Mar 19

Commit

73cf0ca

1 Parent(s): c94926c

Added About Us Section

Browse files

Files changed (5) hide show

app.py +53 -16
css_html_js.py +6 -0
parse.py +3 -1
results.json +21 -21
utils.py +5 -4

app.py CHANGED Viewed

@@ -32,8 +32,8 @@ def generate_scatter_plot(benchmark, metric):
     if benchmark == "RTL-Repo":
         subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
         detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
-        detailed_scores.rename(columns={'Score': 'EM'}, inplace=True)
-        detailed_scores['Average ⬆️'] = detailed_scores['EM']
     else:
         detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
         detailed_scores['Average ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
@@ -44,7 +44,6 @@ def generate_scatter_plot(benchmark, metric):
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
     scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
     type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
     scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
@@ -57,9 +56,10 @@ def generate_scatter_plot(benchmark, metric):
     y_range = y_axis_limits.get(metric, [0, 80])
     fig = px.scatter(
-        scatter_data, x='x', y='y', log_x=True, size='size', color='color', text='Model',
         hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
         labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
         height=600, width=1200
     )
@@ -99,9 +99,9 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
     </p>
     """)
     gr.Markdown("""
-    Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.
     [GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
-    Contact us: [email protected]
     """)
     with gr.Tabs():
@@ -128,23 +128,60 @@ with gr.Blocks(css=custom_css, js=js_func) as app:
                 interactive=False,
                 column_widths=["4%", "5%", "28%", "10%", "14%"],)
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row():
                 bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
                 bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
-            gr.Markdown("We show in 🟢 General Models, in 🟡 Coding Models and in 🔵 RTL-Specific Models. Detailed information is shown when hovering over each model in the plot.")
             scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
             )
     # event handlers, ugly way but it works
     benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
     model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)

     if benchmark == "RTL-Repo":
         subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
         detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
+        detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
+        detailed_scores['Average ⬆️'] = detailed_scores['Exact Matching (EM)']
     else:
         detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
         detailed_scores['Average ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
     scatter_data['size'] = (scatter_data['x'] ** 0.3) * 40
     type_colors = {"General": "green", "Coding": "yellow", "RTL-Specific": "blue"}
     scatter_data['color'] = scatter_data['Model Type'].map(type_colors).fillna('gray')
     y_range = y_axis_limits.get(metric, [0, 80])
     fig = px.scatter(
+        scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
         hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
         labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
+        # color_discrete_map={"General": "#A8D5BA", "Coding": "#F7DC6F", "RTL-Specific": "#87CEFA"},
         height=600, width=1200
     )
     </p>
     """)
     gr.Markdown("""
+    Welcome to the TuRTLe Model Leaderboard! Use the filters below to explore different RTL benchmarks and models.<br/>
     [GitHub Repository](https://github.com/https://github.com/HPAI-BSC) | [arXiv Preprint](https://arxiv.org/) | [How to submit](https://github.com/https://github.com/HPAI-BSC)<br/>
+    If you have any inquiries or wish to collaborate: [email protected]
     """)
     with gr.Tabs():
                 interactive=False,
                 column_widths=["4%", "5%", "28%", "10%", "14%"],)
+            with gr.Row():
+                with gr.Accordion("📙 Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value=CITATION_BUTTON_TEXT,
+                        label=CITATION_BUTTON_LABEL,
+                        lines=20,
+                        elem_id="citation-button",
+                        show_copy_button=True,
+                    )
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row():
                 bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
                 bubble_metric = gr.Radio(choices=metrics, label="Select Metric", value=default_metric)
+            gr.Markdown("We show in 🟢 General Models, in 🔵 Coding Models and in 🔴 RTL-Specific Models. Detailed information is shown when hovering over each model in the plot.")
             scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
+        with gr.Tab("About Us"):
+            gr.HTML(
+                """
+                <div style="max-width: 800px; margin: auto; padding: 20px; border: 1px solid #ccc; border-radius: 10px;">
+                    <h1 style="text-align: center; font-size: 28px; margin-top: -7px;">HPAI-BSC</h1>
+                    <p style="font-size: 18px; text-align: justify;">
+                        The <b>High-Performance Artificial Intelligence (HPAI)</b> group is part of the
+                        <a href="https://bsc.es/" target="_blank">Barcelona Supercomputing Center (BSC)</a>.
+                        This leaderboard is maintained by HPAI as part of our commitment to <b>open science</b>.
+                    </p>
+                    <ul style="font-size: 18px; margin-bottom: 20px; margin-top: 20px;">
+                        <li><a href="https://hpai.bsc.es/" target="_blank">Official Website</a></li>
+                        <li><a href="https://github.com/HPAI-BSC/" target="_blank">GitHub Organization Page</a></li>
+                        <li><a href="https://huggingface.co/HPAI-BSC/" target="_blank">Hugging Face Organization Page</a></li>
+                        <li><a href="https://hpai.bsc.es/publications" target="_blank">Publications</a></li>
+                    </ul>
+                    <p style="font-size: 18px; margin-top: 15px;">
+                        Feel free to contact us:
+                    </p>
+                    <p style="font-size: 18px;">Email: <a href="mailto:[email protected]"><b>[email protected]</b></a></p>
+                </div>
+                """
             )
+        with gr.Row():
+            with gr.Accordion("📙 Citation", open=False):
+                citation_button = gr.Textbox(
+                    value=CITATION_BUTTON_TEXT,
+                    label=CITATION_BUTTON_LABEL,
+                    lines=20,
+                    elem_id="citation-button",
+                    show_copy_button=True,
+                )
     # event handlers, ugly way but it works
     benchmark_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)
     model_type_radio.change(fn=filter_leaderboard, inputs=[benchmark_radio, model_type_radio, search_box, params_slider], outputs=leaderboard)

css_html_js.py CHANGED Viewed

@@ -1,7 +1,13 @@
 custom_css = """
 #component-1 {
     text-align: center;
 }
 #component-0 {
     width: 75vw;
     margin: 0 auto;

 custom_css = """
+.tab-wrapper button {
+    font-size: 16px;
+}
 #component-1 {
     text-align: center;
 }
+#component-3 p {
+    text-align: center;
+}
 #component-0 {
     width: 75vw;
     margin: 0 auto;

parse.py CHANGED Viewed

@@ -65,6 +65,8 @@ def parse_results(csv_path: str) -> list[dict]:
             row = row[1:]
             ctr = 0
             for metric, bench in zip(metrics, benchs):
                 record = {}
                 record["Model"] = model
                 record["Model Type"] = type
@@ -92,7 +94,7 @@ def read_json():
 def read_data() -> Union[pd.DataFrame, list, list, str]:
     data = read_json()
     df = pd.DataFrame(data)
-    df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score'}, inplace=True)
     df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
     benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
     metrics = df['Metric'].unique().tolist()

             row = row[1:]
             ctr = 0
             for metric, bench in zip(metrics, benchs):
+                if metric == "EM":
+                    metric = "Exact Matching (EM)"
                 record = {}
                 record["Model"] = model
                 record["Model Type"] = type
 def read_data() -> Union[pd.DataFrame, list, list, str]:
     data = read_json()
     df = pd.DataFrame(data)
+    df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score', 'EM': 'Exact Matching (EM)'}, inplace=True)
     df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
     benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
     metrics = df['Metric'].unique().tolist()

results.json CHANGED Viewed

@@ -111,7 +111,7 @@
         "Model": "DeepSeek R1",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 33.02,
         "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
         "Params": 685
@@ -336,7 +336,7 @@
         "Model": "Llama 3.1 405B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 33.29,
         "Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
         "Params": 406
@@ -561,7 +561,7 @@
         "Model": "Llama 3.(1-3) 70B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 28.62,
         "Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
         "Params": 70.6
@@ -786,7 +786,7 @@
         "Model": "Qwen2.5 72B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 37.19,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
         "Params": 72.7
@@ -1011,7 +1011,7 @@
         "Model": "Qwen2.5 32B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 28.67,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
         "Params": 32.5
@@ -1236,7 +1236,7 @@
         "Model": "StarChat2 15B v0.1",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 13.24,
         "Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
         "Params": 16
@@ -1461,7 +1461,7 @@
         "Model": "DeepSeek R1 Distill Qwen 14B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 20.65,
         "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         "Params": 14.8
@@ -1686,7 +1686,7 @@
         "Model": "CodeLlama 70B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 24.58,
         "Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
         "Params": 69
@@ -1911,7 +1911,7 @@
         "Model": "QwenCoder 2.5 32B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 30.44,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
         "Params": 32.5
@@ -2136,7 +2136,7 @@
         "Model": "DeepSeek Coder 33B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 30.58,
         "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
         "Params": 33.3
@@ -2361,7 +2361,7 @@
         "Model": "QwenCoder 2.5 14B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 37.16,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
         "Params": 14.7
@@ -2586,7 +2586,7 @@
         "Model": "OpenCoder 8B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 16.63,
         "Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
         "Params": 7.77
@@ -2811,7 +2811,7 @@
         "Model": "QwenCoder 2.5 7B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 28.45,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
         "Params": 7.61
@@ -3036,7 +3036,7 @@
         "Model": "DeepSeek Coder 6,7B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 24.57,
         "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
         "Params": 6.74
@@ -3261,7 +3261,7 @@
         "Model": "RTLCoder Mistral",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 14.97,
         "Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
         "Params": 7.24
@@ -3486,7 +3486,7 @@
         "Model": "RTLCoder DeepSeek",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 19.76,
         "Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
         "Params": 6.74
@@ -3711,7 +3711,7 @@
         "Model": "OriGen",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 19.45,
         "Model URL": "https://huggingface.co/henryen/OriGen_Fix",
         "Params": 6.74
@@ -3936,7 +3936,7 @@
         "Model": "HaVen-CodeQwen",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 25.38,
         "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
         "Params": 7.25
@@ -4161,7 +4161,7 @@
         "Model": "CodeV-CL-7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 12.39,
         "Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
         "Params": 6.74
@@ -4386,7 +4386,7 @@
         "Model": "CodeV-QW-7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 20.56,
         "Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
         "Params": 7.25
@@ -4611,7 +4611,7 @@
         "Model": "CodeV-DS-6.7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
-        "Task": "EM",
         "Result": 21.06,
         "Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
         "Params": 6.74

         "Model": "DeepSeek R1",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 33.02,
         "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
         "Params": 685
         "Model": "Llama 3.1 405B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 33.29,
         "Model URL": "https://huggingface.co/meta-llama/Llama-3.1-405B",
         "Params": 406
         "Model": "Llama 3.(1-3) 70B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 28.62,
         "Model URL": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
         "Params": 70.6
         "Model": "Qwen2.5 72B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 37.19,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
         "Params": 72.7
         "Model": "Qwen2.5 32B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 28.67,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-32B",
         "Params": 32.5
         "Model": "StarChat2 15B v0.1",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 13.24,
         "Model URL": "https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
         "Params": 16
         "Model": "DeepSeek R1 Distill Qwen 14B",
         "Model Type": "General",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 20.65,
         "Model URL": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
         "Params": 14.8
         "Model": "CodeLlama 70B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 24.58,
         "Model URL": "https://huggingface.co/codellama/CodeLlama-70b-hf",
         "Params": 69
         "Model": "QwenCoder 2.5 32B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 30.44,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
         "Params": 32.5
         "Model": "DeepSeek Coder 33B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 30.58,
         "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
         "Params": 33.3
         "Model": "QwenCoder 2.5 14B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 37.16,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
         "Params": 14.7
         "Model": "OpenCoder 8B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 16.63,
         "Model URL": "https://huggingface.co/infly/OpenCoder-8B-Instruct",
         "Params": 7.77
         "Model": "QwenCoder 2.5 7B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 28.45,
         "Model URL": "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
         "Params": 7.61
         "Model": "DeepSeek Coder 6,7B",
         "Model Type": "Coding",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 24.57,
         "Model URL": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
         "Params": 6.74
         "Model": "RTLCoder Mistral",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 14.97,
         "Model URL": "https://huggingface.co/ishorn5/RTLCoder-v1.1",
         "Params": 7.24
         "Model": "RTLCoder DeepSeek",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 19.76,
         "Model URL": "https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
         "Params": 6.74
         "Model": "OriGen",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 19.45,
         "Model URL": "https://huggingface.co/henryen/OriGen_Fix",
         "Params": 6.74
         "Model": "HaVen-CodeQwen",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 25.38,
         "Model URL": "https://huggingface.co/yangyiyao/HaVen-CodeQwen",
         "Params": 7.25
         "Model": "CodeV-CL-7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 12.39,
         "Model URL": "https://huggingface.co/yang-z/CodeV-CL-7B",
         "Params": 6.74
         "Model": "CodeV-QW-7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 20.56,
         "Model URL": "https://huggingface.co/yang-z/CodeV-QW-7B",
         "Params": 7.25
         "Model": "CodeV-DS-6.7B",
         "Model Type": "RTL-Specific",
         "Benchmark": "RTL-Repo",
+        "Task": "Exact Matching (EM)",
         "Result": 21.06,
         "Model URL": "https://huggingface.co/yang-z/CodeV-DS-6.7B",
         "Params": 6.74

utils.py CHANGED Viewed

@@ -5,19 +5,19 @@ import plotly.express as px
 import numpy as np
 type_emoji = {
-    "RTL-Specific": "🔵",
     "General": "🟢",
-    "Coding": "🟡"
 }
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def handle_special_cases(benchmark, metric):
-    if metric == 'EM':
         benchmark = 'RTL-Repo'
     elif benchmark == 'RTL-Repo':
-        metric = 'EM'
     return benchmark, metric
 def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
@@ -27,6 +27,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
     filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
     filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
     filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
     filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
     return filtered_df

 import numpy as np
 type_emoji = {
+    "RTL-Specific": "🔴",
     "General": "🟢",
+    "Coding": "🔵"
 }
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def handle_special_cases(benchmark, metric):
+    if metric == 'Exact Matching (EM)':
         benchmark = 'RTL-Repo'
     elif benchmark == 'RTL-Repo':
+        metric = 'Exact Matching (EM)'
     return benchmark, metric
 def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
     filtered_df['Model'] = filtered_df.apply(lambda row: model_hyperlink(row["Model URL"], row["Model"]), axis=1)
     filtered_df['Type'] = filtered_df['Model Type'].map(lambda x: type_emoji.get(x, ""))
     filtered_df = filtered_df[['Type', 'Model', 'Params', 'Exact Matching (EM)']]
+    filtered_df = filtered_df.sort_values(by='Exact Matching (EM)', ascending=False).reset_index(drop=True)
     filtered_df.insert(0, '', range(1, len(filtered_df) + 1))
     return filtered_df