File size: 5,320 Bytes
35d6846
 
2e7fcc5
35d6846
ae7e646
734692a
 
35d6846
 
 
ae7e646
 
 
 
 
0140307
 
 
ae7e646
734692a
 
 
 
 
 
 
35d6846
 
 
 
a9f93ea
111a3cd
 
35d6846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f028eb9
 
 
35d6846
 
 
 
 
 
97d09e9
35d6846
97d09e9
35d6846
97d09e9
ae7e646
734692a
 
35d6846
c391376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111a3cd
 
 
 
97d09e9
c391376
97d09e9
ae7e646
734692a
 
c391376
 
35d6846
 
1a17233
 
111a3cd
97d09e9
1a17233
35d6846
97d09e9
c391376
35d6846
 
 
c391376
 
1a17233
97d09e9
c391376
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import torch
import gradio as gr
from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel, EulerAncestralDiscreteScheduler
from diffusers.utils import export_to_gif, load_image
from realesrgan import RealESRGAN
from PIL import Image
import cv2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def enhance_quality(image_path):
    model = RealESRGAN(device, scale=4)
    model.load_weights('RealESRGAN_x4.pth', download=True)
    img = Image.open(image_path)
    sr_image = model.predict(img)
    enhanced_path = 'enhanced_' + image_path
    sr_image.save(enhanced_path)
    return enhanced_path

def denoise_image(image_path):
    image = cv2.imread(image_path)
    denoised_image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    denoised_path = 'denoised_' + image_path
    cv2.imwrite(denoised_path, denoised_image)
    return denoised_path

def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale):
    motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
    controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
    vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)
    
    pipe = AnimateDiffPipeline.from_pretrained(
        "SG161222/Realistic_Vision_V6.0_B1_noVAE",
        motion_adapter=motion_adapter,
        controlnet=controlnet,
        vae=vae,
        torch_dtype=torch.float16,
    ).to(device)
    
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)
    
    image_files = [
        "https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
        "https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
        "https://huggingface.co./datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
    ]
    conditioning_frames = [load_image(img_file) for img_file in image_files]

    conditioning_frame_indices = eval(conditioning_frame_indices)
    controlnet_conditioning_scale = float(controlnet_conditioning_scale)

    video = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        conditioning_frames=conditioning_frames,
        controlnet_conditioning_scale=controlnet_conditioning_scale,
        controlnet_frame_indices=conditioning_frame_indices,
        generator=torch.Generator().manual_seed(1337),
    ).frames[0]
    
    export_to_gif(video, "output.gif")
    enhanced_gif = enhance_quality("output.gif")
    denoised_gif = denoise_image(enhanced_gif)
    return denoised_gif

def generate_simple_video(prompt):
    adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16).to(device)
    pipe = AnimateDiffPipeline.from_pretrained("SG161222/Realistic_Vision_V6.0_B1_noVAE", motion_adapter=adapter, torch_dtype=torch.float16).to(device)
    pipe.scheduler = EulerAncestralDiscreteScheduler(
        beta_schedule="linear",
        beta_start=0.00085,
        beta_end=0.012,
    )
    
    pipe.enable_free_noise()
    pipe.vae.enable_slicing()
    pipe.enable_model_cpu_offload()

    frames = pipe(
        prompt,
        num_frames=128,  # Increased for smoother video
        num_inference_steps=100,  # Increased for higher quality
        guidance_scale=15.0,  # Increased for stronger guidance
        decode_chunk_size=1,
    ).frames[0]
    
    export_to_gif(frames, "simple_output.gif")
    enhanced_gif = enhance_quality("simple_output.gif")
    denoised_gif = denoise_image(enhanced_gif)
    return denoised_gif

demo1 = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Prompt", value="an aerial view of a cyberpunk city, night time, neon lights, masterpiece, high quality"),
        gr.Textbox(label="Negative Prompt", value="low quality, worst quality, letterboxed"),
        gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=100),  # Increased default value
        gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
        gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0)
    ],
    outputs=gr.Image(label="Generated Video"),
    title="Generate Video with AnimateDiffSparseControlNetPipeline",
    description="Generate a video using the AnimateDiffSparseControlNetPipeline."
)

demo2 = gr.Interface(
    fn=generate_simple_video,
    inputs=gr.Textbox(label="Prompt", value="An astronaut riding a horse on Mars."),
    outputs=gr.Image(label="Generated Simple Video"),
    title="Generate Simple Video with AnimateDiff",
    description="Generate a simple video using the AnimateDiffPipeline."
)

demo = gr.TabbedInterface([demo1, demo2], ["Advanced Video Generation", "Simple Video Generation"])

demo.launch()
#demo.launch(server_name="0.0.0.0", server_port=7910)