Spaces:

Skywork
/

SkyReels_A2_Bench

Running

App Files Files Community

SkyReels_A2_Bench / app.py

ColinYK

Upload app.py

781842d verified 1 day ago

raw

history blame contribute delete

8.68 kB

	import gradio as gr
	import sys
	import pandas as pd
	import os
	import json
	import shutil
	import zipfile
	import uuid
	import requests

	TEMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')

	metric_scale = {
	'human_face_similarity': 1.5,
	'clip_score': 0.02,
	'env_clip': 1.5,
	'other_subject_clip': 1.5,
	'image_quality': 1.0,
	'dynamic_degree': lambda x: 0.1 if x < 0.885 else (0.95 if x < 0.95 and x > 0.88 else 1.0),
	'aesthetic_quality': 1.0,
	'motion_smoothness': 1.0,
	}

	class ModelResult:
	def __init__(self, data):
	self.name = data['model_name']
	self.project_link = data.get('project_link', None)
	self.result = data['result']

	def to_dict(self):
	if self.project_link is not None:
	res = {
	'model_name': f'<a href="{self.project_link}" target="_blank">{self.name}</a>',
	}
	else:
	res = {
	'model_name': self.name,
	}
	total_score = []
	for metric in self.result.keys():
	res[metric] = round(float(self.result[metric]) - 1e-3, 4)
	if metric == 'dynamic_degree':
	score = metric_scale[metric](self.result[metric]) * self.result[metric]
	else:
	score = self.result[metric] * metric_scale[metric]
	total_score.append(score)
	total_score = sum(total_score) / len(total_score)
	res['comprehensive score'] = round(total_score, 4)
	return res



	def eval_request(model_name, org_link, huggingface_data_set_name):
	params = {
	"model_name": model_name,
	"org_link": org_link,
	"huggingface_data_set_name": huggingface_data_set_name
	}
	response = requests.post(
	"http://47.239.99.255/A2Bench_evaluation/eval",
	params=params, # 使用json参数自动设置Content-Type为application/json
	headers={"Content-Type": "application/json"}
	)
	return response.json()

	def evaluation(model_name, org_link, huggingface_data_set_name):
	try:
	if org_link=="":
	org_link = None
	eval_request(model_name, org_link, huggingface_data_set_name)

	return "Evaluation completed successfully!"
	except Exception as e:
	raise gr.Error(f"Evaluation failed: {str(e)}")



	def load_leaderboard():
	leaderboard_list = []
	file_list = requests.get("http://47.239.99.255/A2Bench_evaluation/load_leaderboard")
	for file in file_list.json():
	leaderboard_list.append(ModelResult(file))
	return leaderboard_list

	HEADER = ['model_name', 'comprehensive score', 'clip_score', 'human_face_similarity', 'env_clip', 'other_subject_clip', 'image_quality', 'dynamic_degree', 'aesthetic_quality', 'motion_smoothness']

	def display_table():
	leaderboard_list = load_leaderboard()
	data = {}
	for metric in HEADER:
	data[metric] = []
	for model_result in leaderboard_list:
	result_dict = model_result.to_dict()
	for metric in HEADER:
	data[metric].append(result_dict[metric])
	df = pd.DataFrame(data)
	df = df.sort_values(by='comprehensive score', ascending=False)
	return df

	_HEADER_1 = '''
	<div style="text-align: center; max-width: 650px; margin: 0 auto;">
	<h1 style="font-size: 2.5rem; font-weight: 700; margin-bottom: 1rem; display: contents;">A2-Bench Leaderboard</h1>
	<p style="font-size: 1rem; margin-bottom: 1.5rem;">Paper: <a href='https://arxiv.org/pdf/2504.02436' target='_blank'>SkyReels-A2 </a> \| Codes: <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'>GitHub</a> \| <a href='https://huggingface.co./Skywork/SkyReels-A2' target='_blank'>HugginceFace</a></p>
	</div>
	❗️❗️❗️LEADERBOARD INTRODUCTION: ❗️❗️❗️
	This is A2-Bench leaderboard which is used to evaluate the performance of elements-to-video (E2V) generation models.
	We provide an evaluation set containing 50 paired multiple elements (character, object, and background). You can check [evaluation set introduction]() for more details. Each evaluation case includes:
	<ul style="font-size: 0.9rem; margin-top: -0.5rem;">
	<li>Human subject (characters): Includes both male and female subjects, covering generated by Flux, celebrities and ordinary people, additionally, we provide several generated human images</li>
	<li>Non-human subject: Various objects including different types of animals, guitars, racing cars, balls, etc.</li>
	<li>Background image: Diverse environmental settings including ordinary indoor and outdoor scenes and famous background suck as The Great Wall and Yellow Wind Ridge (from Black Myth: Wukong)</li>
	<li>Prompt: "A realistic scene where [human] interacts with [object] in [environment], following physical laws and spatial logic".</li>
	</ul>
	</p>
	'''

	img = '''
	<div style="text-align: center; margin: 1rem 0;">
	<h3 style="font-size: 1.2rem; margin-bottom: 0.5rem;">Example Test Case</h3>
	<div style="display: flex; justify-content: center; gap: 1rem; margin: 1rem 0;">
	<img src="https://www.helloimg.com/i/2025/04/07/67f386a7f3717.png" alt="Human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<img src="https://www.helloimg.com/i/2025/04/07/67f38681d9c24.jpg" alt="Non-human Subject Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<img src="https://www.helloimg.com/i/2025/04/07/67f38684117d0.jpg" alt="Background Example" style="max-height: 400px; border-radius: 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);">
	</div>
	<p style="font-size: 0.9rem; margin-top: 0.5rem;">Prompt: A man feeding a bird in the park.</p>
	</div>
	'''
	__HEADER__2 = '''
	We provide a set of evaluation metric of elements-to-video models and a leaderboard to show the performance of different models.
	Evaluation metric include:
	- Elements Consistency: Measures character id consistency using arcface human recognition model, and measures object and background consistency using CLIP model.
	- Video Quality: Measures video quality on image quality, dynamic degree, aesthetic quality and motion smoothness.
	- T2V Metrics: Measures text-video consistency using CLIP
	You can check [Metric Introduction](https://skyworkai.github.io/skyreels-a2.github.io/static/images/bench.png) for more details.
	The leaderboard ranks the models based on the comprehensive score, which is the weighted average of all the metrics. We give T2V metrics and object consistency metrics higher weights.
	You can click the model name to visit the project page, At meantime, you can upload your model result as a huggingface dataset like [this](https://huggingface.co./datasets/ColinYK/pika_dataset).
	''' # noqa E501

	_CITE_ = r"""
	If A2-Bench is helpful, please help to ⭐ the <a href='https://github.com/SkyworkAI/SkyReels-A2' target='_blank'> Github Repo</a>. Thanks!
	---
	📧 Contact
	If you have any questions or feedbacks, feel free to open a discussion or contact <b>[email protected]</b>.
	""" # noqa E501

	def upload_file(files):
	target_dir = os.path.join(TEMP_DIR, f'{files.name}')
	os.makedirs(target_dir, exist_ok=True)
	shutil.move(files.name, target_dir)
	return target_dir

	with gr.Blocks(css=".gr-dataframe a {text-decoration: none; color: inherit;}") as demo:
	gr.Markdown(_HEADER_1)
	gr.HTML(img)
	gr.Markdown(__HEADER__2)
	with gr.Group():
	table = gr.DataFrame(
	value=display_table(),
	datatype=['markdown', 'str'],
	interactive=False, # 允许编辑
	headers=HEADER,
	)
	Refresh = gr.Button("Refresh")
	Refresh.click(display_table, outputs=table)

	with gr.Group():

	with gr.Row():
	model_name_input = gr.Textbox(label="Model Name", placeholder="Required:Enter your model name")
	org_link_input = gr.Textbox(label="Project Page", placeholder="Optional:Enter project page, will show on leaderboard", value=None)
	huggingface_data_set_name = gr.Textbox(label="Huggingface Data Set Name", placeholder="Required :Enter huggingface dataset set name, will show on leaderboard")

	evaluation_btn = gr.Button("Evaluation")
	output_message = gr.Textbox(label="Evaluation Status", interactive=False)
	evaluation_btn.click(
	evaluation,
	inputs=[model_name_input, org_link_input, huggingface_data_set_name],
	outputs=output_message,
	api_name="evaluate",
	)



	gr.Markdown(_CITE_)



	if __name__ == "__main__":
	demo.launch()