Spaces:
Sleeping
Sleeping
File size: 5,320 Bytes
35d6846 2e7fcc5 35d6846 ae7e646 734692a 35d6846 ae7e646 0140307 ae7e646 734692a 35d6846 a9f93ea 111a3cd 35d6846 f028eb9 35d6846 97d09e9 35d6846 97d09e9 35d6846 97d09e9 ae7e646 734692a 35d6846 c391376 111a3cd 97d09e9 c391376 97d09e9 ae7e646 734692a c391376 35d6846 1a17233 111a3cd 97d09e9 1a17233 35d6846 97d09e9 c391376 35d6846 c391376 1a17233 97d09e9 c391376 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import torch
import gradio as gr
from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel, EulerAncestralDiscreteScheduler
from diffusers.utils import export_to_gif, load_image
from realesrgan import RealESRGAN
from PIL import Image
import cv2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def enhance_quality(image_path):
model = RealESRGAN(device, scale=4)
model.load_weights('RealESRGAN_x4.pth', download=True)
img = Image.open(image_path)
sr_image = model.predict(img)
enhanced_path = 'enhanced_' + image_path
sr_image.save(enhanced_path)
return enhanced_path
def denoise_image(image_path):
image = cv2.imread(image_path)
denoised_image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
denoised_path = 'denoised_' + image_path
cv2.imwrite(denoised_path, denoised_image)
return denoised_path
def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale):
motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)
pipe = AnimateDiffPipeline.from_pretrained(
"SG161222/Realistic_Vision_V6.0_B1_noVAE",
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
).to(device)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)
image_files = [
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
"https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
]
conditioning_frames = [load_image(img_file) for img_file in image_files]
conditioning_frame_indices = eval(conditioning_frame_indices)
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
video = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
conditioning_frames=conditioning_frames,
controlnet_conditioning_scale=controlnet_conditioning_scale,
controlnet_frame_indices=conditioning_frame_indices,
generator=torch.Generator().manual_seed(1337),
).frames[0]
export_to_gif(video, "output.gif")
enhanced_gif = enhance_quality("output.gif")
denoised_gif = denoise_image(enhanced_gif)
return denoised_gif
def generate_simple_video(prompt):
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16).to(device)
pipe = AnimateDiffPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter, torch_dtype=torch.float16).to(device)
pipe.scheduler = EulerAncestralDiscreteScheduler(
beta_schedule="linear",
beta_start=0.00085,
beta_end=0.012,
)
pipe.enable_free_noise()
pipe.vae.enable_slicing()
pipe.enable_model_cpu_offload()
frames = pipe(
prompt,
num_frames=128, # Increased for smoother video
num_inference_steps=100, # Increased for higher quality
guidance_scale=15.0, # Increased for stronger guidance
decode_chunk_size=1,
).frames[0]
export_to_gif(frames, "simple_output.gif")
enhanced_gif = enhance_quality("simple_output.gif")
denoised_gif = denoise_image(enhanced_gif)
return denoised_gif
demo1 = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Prompt", value="an aerial view of a cyberpunk city, night time, neon lights, masterpiece, high quality"),
gr.Textbox(label="Negative Prompt", value="low quality, worst quality, letterboxed"),
gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=100), # Increased default value
gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0)
],
outputs=gr.Image(label="Generated Video"),
title="Generate Video with AnimateDiffSparseControlNetPipeline",
description="Generate a video using the AnimateDiffSparseControlNetPipeline."
)
demo2 = gr.Interface(
fn=generate_simple_video,
inputs=gr.Textbox(label="Prompt", value="An astronaut riding a horse on Mars."),
outputs=gr.Image(label="Generated Simple Video"),
title="Generate Simple Video with AnimateDiff",
description="Generate a simple video using the AnimateDiffPipeline."
)
demo = gr.TabbedInterface([demo1, demo2], ["Advanced Video Generation", "Simple Video Generation"])
demo.launch()
#demo.launch(server_name="0.0.0.0", server_port=7910)
|