Spaces:

HPAI-BSC
/

TuRTLe-Leaderboard

Running

File size: 5,059 Bytes

import json
import pandas as pd
import csv
from typing import Dict, Union
import locale

model_details = {
    "DeepSeek R1": ("https://huggingface.co./deepseek-ai/DeepSeek-R1", 685, "General"),
    "Llama 3.1 405B": ("https://huggingface.co./meta-llama/Llama-3.1-405B", 406, "General"),
    "Llama 3.(1-3) 70B": ("https://huggingface.co./meta-llama/Llama-3.3-70B-Instruct", 70.6, "General"),
    "Qwen2.5 72B": ("https://huggingface.co./Qwen/Qwen2.5-72B-Instruct", 72.7, "General"),
    "Qwen2.5 32B": ("https://huggingface.co./Qwen/Qwen2.5-32B", 32.5, "General"),
    "StarChat2 15B v0.1": ("https://huggingface.co./HuggingFaceH4/starchat2-15b-v0.1", 16, "General"),
    "DeepSeek R1 Distill Qwen 14B": ("https://huggingface.co./deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", 14.8, "General"),
    
    "CodeLlama 70B": ("https://huggingface.co./codellama/CodeLlama-70b-hf", 69, "Coding"),
    "QwenCoder 2.5 32B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-32B-Instruct", 32.5, "Coding"),
    "DeepSeek Coder 33B": ("https://huggingface.co./deepseek-ai/deepseek-coder-33b-instruct", 33.3, "Coding"),
    "QwenCoder 2.5 14B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-14B-Instruct", 14.7, "Coding"),
    "OpenCoder 8B": ("https://huggingface.co./infly/OpenCoder-8B-Instruct", 7.77, "Coding"),
    "QwenCoder 2.5 7B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-7B-Instruct", 7.61, "Coding"),
    "DeepSeek Coder 6,7B": ("https://huggingface.co./deepseek-ai/deepseek-coder-6.7b-instruct", 6.74, "Coding"),

    "HaVen-CodeQwen": ("https://huggingface.co./yangyiyao/HaVen-CodeQwen", 7.25, "RTL-Specific"),
    "CodeV-CL-7B": ("https://huggingface.co./yang-z/CodeV-CL-7B", 6.74, "RTL-Specific"),
    "CodeV-QW-7B": ("https://huggingface.co./yang-z/CodeV-QW-7B", 7.25, "RTL-Specific"),
    "CodeV-DS-6.7B": ("https://huggingface.co./yang-z/CodeV-DS-6.7B", 6.74, "RTL-Specific"),
    "RTLCoder Mistral": ("https://huggingface.co./ishorn5/RTLCoder-v1.1", 7.24, "RTL-Specific"),
    "RTLCoder DeepSeek": ("https://huggingface.co./ishorn5/RTLCoder-Deepseek-v1.1", 6.74, "RTL-Specific"),
    "OriGen": ("https://huggingface.co./henryen/OriGen_Fix", 6.74, "RTL-Specific")
}

def get_headers(reader, agg=False) -> Union[list, list]:
    metrics, benchs = [], []
    for i, row in enumerate(reader):
        if i == 0:
            metrics = row[1:]
        elif i == 1 and not agg:
            benchs = row[1:]
            break
        else:
            return metrics
    return metrics, benchs

def get_model_params_and_url(model) -> Union[str, str, float]:
    if model not in model_details:
        return "-", "-", "-"
    url = model_details[model][0]
    params = model_details[model][1]
    type = model_details[model][2]
    return url, params, type

def parse_results(csv_path: str) -> list[dict]:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    dataset = []
    models = []
    with open(csv_path, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        metrics, benchs = get_headers(reader)
        for i, row in enumerate(reader):
            model = row[0]
            url, params, type = get_model_params_and_url(model)
            models.append(model)
            row = row[1:]
            ctr = 0
            for metric, bench in zip(metrics, benchs):
                if metric == "EM":
                    metric = "Exact Matching (EM)"
                record = {} 
                record["Model"] = model
                record["Model Type"] = type
                record["Benchmark"] = bench
                record["Task"] = metric
                record["Result"] = float(row[ctr].replace(',','.'))
                record["Model URL"] = url
                record["Params"] = params
                dataset.append(record)
                ctr += 1
    print(models)
    return dataset

def parse_agg(csv_path: str) -> list[dict]:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    return pd.read_csv("aggregated_scores.csv")

def writeJson(data: list):
    with open('results.json', 'w') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)
    print("Done")

def read_json():
    json_path = "./results.json"
    with open(json_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

def read_data() -> Union[pd.DataFrame, list, list, str]:
    data = read_json()
    df = pd.DataFrame(data)
    df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score', 'EM': 'Exact Matching (EM)'}, inplace=True)
    df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
    benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
    metrics = df['Metric'].unique().tolist()
    default_metric = 'Functionality (FNC)' if 'Functionality (FNC)' in metrics else metrics[0]
    return df, benchmarks, metrics, default_metric


if __name__ == "__main__":
    csv_path = "./results.csv"
    d = parse_results(csv_path)
    writeJson(d)