Spaces:
Running
Running
import gradio as gr | |
import sys | |
import pandas as pd | |
import os | |
import json | |
import shutil | |
import zipfile | |
import uuid | |
import requests | |
TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp') | |
metric_scale = { | |
'human_face_similarity': 1.5, | |
'clip_score': 0.02, | |
'env_clip': 1.5, | |
'other_subject_clip': 1.5, | |
'image_quality': 1.0, | |
'dynamic_degree': lambda x: 0.1 if x < 0.885 else (0.95 if x < 0.95 and x > 0.88 else 1.0), | |
'aesthetic_quality': 1.0, | |
'motion_smoothness': 1.0, | |
} | |
class ModelResult: | |
def __init__(self, data): | |
self.name = data['model_name'] | |
self.project_link = data.get('project_link', None) | |
self.result = data['result'] | |
def to_dict(self): | |
if self.project_link is not None: | |
res = { | |
'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>', | |
} | |
else: | |
res = { | |
'model_name': self.name, | |
} | |
total_score = [] | |
for metric in self.result.keys(): | |
res[metric] = round(float(self.result[metric]) - 1e-3, 4) | |
if metric == 'dynamic_degree': | |
score = metric_scale[metric](self.result[metric]) * self.result[metric] | |
else: | |
score = self.result[metric] * metric_scale[metric] | |
total_score.append(score) | |
total_score = sum(total_score) / len(total_score) | |
res['comprehensive score'] = round(total_score, 4) | |
return res | |
def eval_request(model_name, org_link, huggingface_data_set_name): | |
params = { | |
"model_name": model_name, | |
"org_link": org_link, | |
"huggingface_data_set_name": huggingface_data_set_name | |
} | |
response = requests.post( | |
"http://47.239.99.255/A2Bench_evaluation/eval", | |
params=params, # 使用json参数自动设置Content-Type为application/json | |
headers={"Content-Type": "application/json"} | |
) | |
return response.json() | |
def evaluation(model_name, org_link, huggingface_data_set_name): | |
try: | |
if org_link=="": | |
org_link = None | |
eval_request(model_name, org_link, huggingface_data_set_name) | |
return "Evaluation completed successfully!" | |
except Exception as e: | |
raise gr.Error(f"Evaluation failed: {str(e)}") | |
def load_leaderboard(): | |
leaderboard_list = [] | |
file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard") | |
for file in file_list.json(): | |
leaderboard_list.append(ModelResult(file)) | |
return leaderboard_list | |
HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness'] | |
def display_table(): | |
leaderboard_list = load_leaderboard() | |
data = {} | |
for metric in HEADER: | |
data[metric] = [] | |
for model_result in leaderboard_list: | |
result_dict = model_result.to_dict() | |
for metric in HEADER: | |
data[metric].append(result_dict[metric]) | |
df = pd.DataFrame(data) | |
df = df.sort_values(by='comprehensive score', ascending=False) | |
return df | |
_HEADER_1 = ''' | |
<div style="text-align: center; max-width: 650px; margin: 0 auto;"> | |
<h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1> | |
<p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> | Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> | <a href='https://huggingface.co./Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p> | |
</div> | |
❗️❗️❗️**LEADERBOARD INTRODUCTION:** ❗️❗️❗️ | |
This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models. | |
We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes: | |
<ul style="font-size: 0.9rem; margin-top: -0.5rem;"> | |
<li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li> | |
<li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li> | |
<li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li> | |
<li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li> | |
</ul> | |
</p> | |
''' | |
img = ''' | |
<div style="text-align: center; margin: 1rem 0;"> | |
<h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3> | |
<div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;"> | |
<img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> | |
<img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> | |
<img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);"> | |
</div> | |
<p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p> | |
</div> | |
''' | |
__HEADER__2 = ''' | |
We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models. | |
Evaluation metric include: | |
- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model. | |
- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness. | |
- T2V Metrics: Measures text-video consistency using CLIP | |
You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details. | |
The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights. | |
You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co./datasets/ColinYK/pika_dataset). | |
''' # noqa E501 | |
_CITE_ = r""" | |
If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks! | |
--- | |
📧 **Contact** | |
If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>. | |
""" # noqa E501 | |
def upload_file(files): | |
target_dir = os.path.join(TEMP_DIR, f'{files.name}') | |
os.makedirs(target_dir, exist_ok=True) | |
shutil.move(files.name, target_dir) | |
return target_dir | |
with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}") as demo: | |
gr.Markdown(_HEADER_1) | |
gr.HTML(img) | |
gr.Markdown(__HEADER__2) | |
with gr.Group(): | |
table = gr.DataFrame( | |
value=display_table(), | |
datatype=['markdown', 'str'], | |
interactive=False, # 允许编辑 | |
headers=HEADER, | |
) | |
Refresh = gr.Button("Refresh") | |
Refresh.click(display_table, outputs=table) | |
with gr.Group(): | |
with gr.Row(): | |
model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name") | |
org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard", value=None) | |
huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard") | |
evaluation_btn = gr.Button("Evaluation") | |
output_message = gr.Textbox(label="Evaluation Status", interactive=False) | |
evaluation_btn.click( | |
evaluation, | |
inputs=[model_name_input, org_link_input, huggingface_data_set_name], | |
outputs=output_message, | |
api_name="evaluate", | |
) | |
gr.Markdown(_CITE_) | |
if __name__ == "__main__": | |
demo.launch() |