Spaces:
Sleeping
Sleeping
File size: 3,682 Bytes
35d6846 93c305a 35d6846 93c305a 35d6846 93c305a 35d6846 93c305a 35d6846 a9f93ea 111a3cd 35d6846 f028eb9 35d6846 97d09e9 35d6846 97d09e9 35d6846 97d09e9 cdb7645 35d6846 93c305a 35d6846 d2d7e87 93c305a cdb7645 97d09e9 1a17233 35d6846 97d09e9 93c305a 35d6846 cdb7645 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import torch
import gradio as gr
from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel
from diffusers.utils import export_to_gif, load_image
from transformers import pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 한글-영어 번역 모델 로드
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
def translate_korean_to_english(text):
if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in text):
translated = translator(text)[0]['translation_text']
return translated
return text
def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale):
prompt = translate_korean_to_english(prompt)
negative_prompt = translate_korean_to_english(negative_prompt)
motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)
pipe = AnimateDiffPipeline.from_pretrained(
"SG161222/Realistic_Vision_V6.0_B1_noVAE",
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
).to(device)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)
image_files = [
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
]
conditioning_frames = [load_image(img_file) for img_file in image_files]
conditioning_frame_indices = eval(conditioning_frame_indices)
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
video = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
conditioning_frames=conditioning_frames,
controlnet_conditioning_scale=controlnet_conditioning_scale,
controlnet_frame_indices=conditioning_frame_indices,
generator=torch.Generator().manual_seed(1337),
).frames[0]
export_to_gif(video, "output.gif")
return "output.gif"
demo = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Prompt (한글 또는 영어)", value="카페에서 커피 마시는 아름다운 프랑스 여성, 걸작, 고품질"),
gr.Textbox(label="Negative Prompt (한글 또는 영어)", value="저품질, 최악의 품질, 레터박스"),
gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=100),
gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0)
],
outputs=gr.Image(label="Generated Video"),
title="AnimateDiffSparseControlNetPipeline을 사용한 비디오 생성",
description="AnimateDiffSparseControlNetPipeline을 사용하여 비디오를 생성합니다. 한글 또는 영어로 프롬프트를 입력할 수 있습니다."
)
demo.launch() |