Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

App Files Files Community

ggcristian commited on 19 days ago

Commit

218e8a1

1 Parent(s): 8e9d8db

use dropdowns for bubble plot and add cursor-pointer as css

Browse files

Files changed (2) hide show

app.py +67 -22
css_html_js.py +3 -2

app.py CHANGED Viewed

@@ -76,20 +76,64 @@ def update_benchmarks_by_task(task):
         return gr.update(choices=["All"] + benchmarks, value="All")
 def generate_scatter_plot(benchmark, metric):
-    benchmark, metric = handle_special_cases(benchmark, metric)
-    subset = df[df['Benchmark'] == benchmark]
-    if benchmark == "RTL-Repo":
-        subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
-        detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
-        detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
-        detailed_scores['Average ⬆️'] = detailed_scores['Exact Matching (EM)']
     else:
-        detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
-        detailed_scores['Average ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1)
-    details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
-    scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
@@ -101,7 +145,7 @@ def generate_scatter_plot(benchmark, metric):
     y_axis_limits = {
         'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
         'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50],
-        'Average ⬆️': [0, 80]
     }
     y_range = y_axis_limits.get(metric, [0, 80])
@@ -109,10 +153,6 @@ def generate_scatter_plot(benchmark, metric):
         scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
         hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
         labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
-        # color_discrete_map={"General": "
-#A8D5BA", "Coding": "
-#F7DC6F", "RTL-Specific": "
-#87CEFA"},
         height=600, width=1200
     )
@@ -223,9 +263,10 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row(equal_height=True):
-                bubble_benchmark = gr.Radio(choices=benchmarks, label="Select Benchmark", value='VerilogEval S2R')
-                bubble_metric = gr.Radio(choices=non_rtl_metrics[:-1], label="Select Metric", value="Syntax (STX)")
-            scatter_plot = gr.Plot(value=generate_scatter_plot('VerilogEval S2R', default_metric), label="Bubble Chart", elem_id="full-width-plot")
         with gr.Tab("About Us"):
             gr.HTML(
@@ -282,8 +323,12 @@ with gr.Blocks(css=custom_css, js=js_func, theme=gr.themes.Default(primary_hue=c
             metric = "Exact Matching (EM)"
             return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
         else:
-            metric = non_rtl_metrics[0]  # default to Syntax
-            return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
     def on_metric_change(benchmark, metric):
         benchmark, metric = handle_special_cases(benchmark, metric)

         return gr.update(choices=["All"] + benchmarks, value="All")
 def generate_scatter_plot(benchmark, metric):
+    if benchmark == "All":
+        models_data = []
+        for bench in benchmarks:
+            subset = df[df['Benchmark'] == bench]
+            if bench == "RTL-Repo":
+                subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
+            models_in_bench = subset['Model'].unique()
+            models_data.extend([(model, bench) for model in models_in_bench])
+        all_models = list(set([m[0] for m in models_data]))
+        details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
+        if metric == "Aggregated ⬆️":
+            agg_columns = [col for col in df_agg.columns if col.startswith('Agg ')]
+            if agg_columns:
+                agg_data = df_agg.copy()
+                agg_data['Aggregated ⬆️'] = agg_data[agg_columns].mean(axis=1).round(2)
+                scatter_data = pd.merge(details, agg_data[['Model', 'Aggregated ⬆️']], on='Model', how='inner')
+            else:
+                scatter_data = details.copy()
+                scatter_data['Aggregated ⬆️'] = 50  # defaut
+        else:
+            scatter_data = details.copy()
+            metric_data = df[df['Metric'] == metric].groupby('Model')['Score'].mean().reset_index()
+            metric_data = metric_data.rename(columns={'Score': metric})
+            scatter_data = pd.merge(scatter_data, metric_data, on='Model', how='left')
+        scatter_data = scatter_data.dropna(subset=[metric] if metric in scatter_data.columns else ['Aggregated ⬆️'])
     else:
+        # Code we already had for individual benchmark selection
+        benchmark, metric = handle_special_cases(benchmark, metric)
+        subset = df[df['Benchmark'] == benchmark]
+        if benchmark == "RTL-Repo":
+            subset = subset[subset['Metric'].str.contains('EM', case=False, na=False)]
+            detailed_scores = subset.groupby('Model', as_index=False)['Score'].mean()
+            detailed_scores.rename(columns={'Score': 'Exact Matching (EM)'}, inplace=True)
+            detailed_scores['Aggregated ⬆️'] = detailed_scores['Exact Matching (EM)']
+        else:
+            agg_column = None
+            detailed_scores = subset.pivot_table(index='Model', columns='Metric', values='Score').reset_index()
+            if benchmark == 'VerilogEval S2R':
+                agg_column = 'Agg VerilogEval S2R'
+            elif benchmark == 'VerilogEval MC':
+                agg_column = 'Agg VerilogEval MC'
+            elif benchmark == 'RTLLM':
+                agg_column = 'Agg RTLLM'
+            elif benchmark == 'VeriGen':
+                agg_column = 'Agg VeriGen'
+            if agg_column and agg_column in df_agg.columns:
+                agg_data = df_agg[['Model', agg_column]].rename(columns={agg_column: 'Aggregated ⬆️'})
+                detailed_scores = pd.merge(detailed_scores, agg_data, on='Model', how='left')
+            else:
+                detailed_scores['Aggregated ⬆️'] = detailed_scores[['Syntax (STX)', 'Functionality (FNC)', 'Synthesis (SYN)', 'Power', 'Performance', 'Area']].mean(axis=1).round(2)
+        details = df[['Model', 'Params', 'Model Type']].drop_duplicates('Model')
+        scatter_data = pd.merge(detailed_scores, details, on='Model', how='left').dropna(subset=['Params', metric])
     scatter_data['x'] = scatter_data['Params']
     scatter_data['y'] = scatter_data[metric]
     y_axis_limits = {
         'Functionality (FNC)': [5, 90], 'Syntax (STX)': [20, 100], 'Synthesis (SYN)': [5, 90],
         'Power': [0, 50], 'Performance': [0, 50], 'Area': [0, 50], 'Exact Matching (EM)': [0, 50],
+        'Aggregated ⬆️': [0, 80]
     }
     y_range = y_axis_limits.get(metric, [0, 80])
         scatter_data, x='x', y='y', log_x=True, size='size', color='Model Type', text='Model',
         hover_data={metric: ':.2f'}, title=f'Params vs. {metric} for {benchmark}',
         labels={'x': '# Params (Log Scale)', 'y': metric}, template="plotly_white",
         height=600, width=1200
     )
         with gr.Tab("Interactive Bubble Plot"):
             with gr.Row(equal_height=True):
+                bubble_benchmark = gr.Dropdown(choices=["All"] + benchmarks, label="Select Benchmark", value='All', elem_classes="gr-dropdown")
+                bubble_metric = gr.Dropdown(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], label="Select Metric", value="Aggregated ⬆️")
+            with gr.Row(equal_height=True):
+                scatter_plot = gr.Plot(value=generate_scatter_plot('All', "Aggregated ⬆️"), label="Bubble Chart", elem_id="full-width-plot")
         with gr.Tab("About Us"):
             gr.HTML(
             metric = "Exact Matching (EM)"
             return gr.update(choices=rtl_metrics, value=metric), generate_scatter_plot(benchmark, metric)
         else:
+            if benchmark == "All":
+                metric = "Aggregated ⬆️" # default to Aggregated
+                return gr.update(choices=["Aggregated ⬆️"] + non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
+            else:
+                metric = non_rtl_metrics[0]
+                return gr.update(choices=non_rtl_metrics[:-1], value=metric), generate_scatter_plot(benchmark, metric)
     def on_metric_change(benchmark, metric):
         benchmark, metric = handle_special_cases(benchmark, metric)

css_html_js.py CHANGED Viewed

@@ -51,11 +51,9 @@ custom_css = """
     background: none;
     border: none;
 }
 #search-bar {
     padding: 0px;
 }
-/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
 #leaderboard-table td:nth-child(2),
 #leaderboard-table th:nth-child(2) {
     max-width: 400px;
@@ -111,6 +109,9 @@ custom_css = """
 .slider_input_container {
     padding-top: 8px;
 }
 """
 get_window_url_params = """

     background: none;
     border: none;
 }
 #search-bar {
     padding: 0px;
 }
 #leaderboard-table td:nth-child(2),
 #leaderboard-table th:nth-child(2) {
     max-width: 400px;
 .slider_input_container {
     padding-top: 8px;
 }
+input[role="listbox"] {
+    cursor: pointer !important;
+}
 """
 get_window_url_params = """