File size: 5,059 Bytes
65e4811
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf0c71
65e4811
 
 
 
aaf0c71
65e4811
 
aaf0c71
 
65e4811
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73cf0ca
 
65e4811
 
 
 
 
 
 
 
 
 
 
 
 
aaf0c71
 
 
 
 
 
 
65e4811
 
 
 
 
 
 
 
 
 
 
 
 
 
73cf0ca
65e4811
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import json
import pandas as pd
import csv
from typing import Dict, Union
import locale

model_details = {
    "DeepSeek R1": ("https://huggingface.co./deepseek-ai/DeepSeek-R1", 685, "General"),
    "Llama 3.1 405B": ("https://huggingface.co./meta-llama/Llama-3.1-405B", 406, "General"),
    "Llama 3.(1-3) 70B": ("https://huggingface.co./meta-llama/Llama-3.3-70B-Instruct", 70.6, "General"),
    "Qwen2.5 72B": ("https://huggingface.co./Qwen/Qwen2.5-72B-Instruct", 72.7, "General"),
    "Qwen2.5 32B": ("https://huggingface.co./Qwen/Qwen2.5-32B", 32.5, "General"),
    "StarChat2 15B v0.1": ("https://huggingface.co./HuggingFaceH4/starchat2-15b-v0.1", 16, "General"),
    "DeepSeek R1 Distill Qwen 14B": ("https://huggingface.co./deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", 14.8, "General"),
    
    "CodeLlama 70B": ("https://huggingface.co./codellama/CodeLlama-70b-hf", 69, "Coding"),
    "QwenCoder 2.5 32B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-32B-Instruct", 32.5, "Coding"),
    "DeepSeek Coder 33B": ("https://huggingface.co./deepseek-ai/deepseek-coder-33b-instruct", 33.3, "Coding"),
    "QwenCoder 2.5 14B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-14B-Instruct", 14.7, "Coding"),
    "OpenCoder 8B": ("https://huggingface.co./infly/OpenCoder-8B-Instruct", 7.77, "Coding"),
    "QwenCoder 2.5 7B": ("https://huggingface.co./Qwen/Qwen2.5-Coder-7B-Instruct", 7.61, "Coding"),
    "DeepSeek Coder 6,7B": ("https://huggingface.co./deepseek-ai/deepseek-coder-6.7b-instruct", 6.74, "Coding"),

    "HaVen-CodeQwen": ("https://huggingface.co./yangyiyao/HaVen-CodeQwen", 7.25, "RTL-Specific"),
    "CodeV-CL-7B": ("https://huggingface.co./yang-z/CodeV-CL-7B", 6.74, "RTL-Specific"),
    "CodeV-QW-7B": ("https://huggingface.co./yang-z/CodeV-QW-7B", 7.25, "RTL-Specific"),
    "CodeV-DS-6.7B": ("https://huggingface.co./yang-z/CodeV-DS-6.7B", 6.74, "RTL-Specific"),
    "RTLCoder Mistral": ("https://huggingface.co./ishorn5/RTLCoder-v1.1", 7.24, "RTL-Specific"),
    "RTLCoder DeepSeek": ("https://huggingface.co./ishorn5/RTLCoder-Deepseek-v1.1", 6.74, "RTL-Specific"),
    "OriGen": ("https://huggingface.co./henryen/OriGen_Fix", 6.74, "RTL-Specific")
}

def get_headers(reader, agg=False) -> Union[list, list]:
    metrics, benchs = [], []
    for i, row in enumerate(reader):
        if i == 0:
            metrics = row[1:]
        elif i == 1 and not agg:
            benchs = row[1:]
            break
        else:
            return metrics
    return metrics, benchs

def get_model_params_and_url(model) -> Union[str, str, float]:
    if model not in model_details:
        return "-", "-", "-"
    url = model_details[model][0]
    params = model_details[model][1]
    type = model_details[model][2]
    return url, params, type

def parse_results(csv_path: str) -> list[dict]:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    dataset = []
    models = []
    with open(csv_path, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        metrics, benchs = get_headers(reader)
        for i, row in enumerate(reader):
            model = row[0]
            url, params, type = get_model_params_and_url(model)
            models.append(model)
            row = row[1:]
            ctr = 0
            for metric, bench in zip(metrics, benchs):
                if metric == "EM":
                    metric = "Exact Matching (EM)"
                record = {} 
                record["Model"] = model
                record["Model Type"] = type
                record["Benchmark"] = bench
                record["Task"] = metric
                record["Result"] = float(row[ctr].replace(',','.'))
                record["Model URL"] = url
                record["Params"] = params
                dataset.append(record)
                ctr += 1
    print(models)
    return dataset

def parse_agg(csv_path: str) -> list[dict]:
    """
    Each row has the following format:
        MODEL | BENCHMARK | TASK | METRIC | RESULT
    """
    return pd.read_csv("aggregated_scores.csv")

def writeJson(data: list):
    with open('results.json', 'w') as f:
        json.dump(data, f, indent=4, ensure_ascii=False)
    print("Done")

def read_json():
    json_path = "./results.json"
    with open(json_path, "r", encoding="utf-8") as file:
        data = json.load(file)
    return data

def read_data() -> Union[pd.DataFrame, list, list, str]:
    data = read_json()
    df = pd.DataFrame(data)
    df.rename(columns={'Model': 'Model', 'Benchmark': 'Benchmark', 'Task': 'Metric', 'Result': 'Score', 'EM': 'Exact Matching (EM)'}, inplace=True)
    df['Params'] = pd.to_numeric(df['Params'], errors='coerce')
    benchmarks = sorted(df['Benchmark'].unique().tolist(), reverse=True)
    metrics = df['Metric'].unique().tolist()
    default_metric = 'Functionality (FNC)' if 'Functionality (FNC)' in metrics else metrics[0]
    return df, benchmarks, metrics, default_metric


if __name__ == "__main__":
    csv_path = "./results.csv"
    d = parse_results(csv_path)
    writeJson(d)