File size: 4,014 Bytes
748160a
227bc73
1a780e6
27b9ec6
a5c228f
27b9ec6
227bc73
df19679
1a780e6
 
df19679
 
6746920
a4a2927
8eae273
e733a7b
460d5ba
8eae273
 
460d5ba
8eae273
 
 
 
28de867
e733a7b
8eae273
 
8d947c4
eb08525
1a780e6
 
 
 
 
c657f20
1a780e6
 
 
 
 
2e5533e
1a780e6
 
65582c8
67b4462
1a780e6
67b4462
 
ecea5f9
 
2fdb327
 
 
 
1a780e6
67b4462
ecea5f9
1a780e6
ecea5f9
 
1a780e6
2fdb327
a83cc71
2fdb327
1a780e6
2fdb327
 
1a780e6
 
6746920
a4a2927
1a780e6
 
 
 
 
e4ddd47
2fdb327
 
 
 
67b4462
2fdb327
 
 
 
 
67b4462
 
2fdb327
5a59c3d
2fdb327
38056ac
2fdb327
 
 
 
67b4462
 
 
 
 
 
 
1a780e6
 
 
 
67b4462
6746920
1a780e6
 
 
a4a2927
1a780e6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import spaces
import gradio as gr
import argparse
import sys
import time
import os
import random
from skyreelsinfer.offload import OffloadConfig
from skyreelsinfer import TaskType
from skyreelsinfer.skyreels_video_infer import SkyReelsVideoSingleGpuInfer
from diffusers.utils import export_to_video
from diffusers.utils import load_image
from PIL import Image

import torch

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
torch.backends.cuda.preferred_blas_library="cublas"
torch.backends.cuda.preferred_linalg_library="cusolver"
torch.set_float32_matmul_precision("high")

os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
os.environ["SAFETENSORS_FAST_GPU"] = "1"
os.putenv("TOKENIZERS_PARALLELISM","False")

def init_predictor():
    global predictor
    predictor = SkyReelsVideoSingleGpuInfer(
        task_type= TaskType.I2V,
        model_id="Skywork/SkyReels-V1-Hunyuan-I2V",
        quant_model=False,
        is_offload=False,
        offload_config=OffloadConfig(
            high_cpu_memory=True,
            parameters_level=True,
            compiler_transformer=False,
        )
    )
    
@spaces.GPU(duration=120)
def generate_video(prompt, image, size, steps, frames, guidance_scale, progress=gr.Progress(track_tqdm=True) ):
    print(f"image:{type(image)}")
    random.seed(time.time())
    seed = int(random.randrange(4294967294))
    kwargs = {
        "prompt": prompt,
        "height": size,
        "width": size,
        "num_frames": frames,
        "num_inference_steps": steps,
        "seed": seed,
        "guidance_scale": guidance_scale,
        "embedded_guidance_scale": 1.0,
        "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
        "cfg_for": False,
    }
    assert image is not None, "please input image"
    img = load_image(image=image)
    img.resize((size,size), Image.LANCZOS)
    kwargs["image"] = img
    output = predictor.inference(kwargs)
    save_dir = f"./"
    video_out_file = f"{seed}.mp4"
    print(f"generate video, local path: {video_out_file}")
    export_to_video(output, video_out_file, fps=24)
    return video_out_file

def create_gradio_interface():
        with gr.Blocks() as demo:
            with gr.Row():
                image = gr.Image(label="Upload Image", type="filepath")
                prompt = gr.Textbox(label="Input Prompt")
                size = gr.Slider(
                    label="Size",
                    minimum=256,
                    maximum=1024,
                    step=16,
                    value=368,
                )
                frames = gr.Slider(
                    label="Number of Frames",
                    minimum=16,
                    maximum=256,
                    step=12,
                    value=48,
                )
                steps = gr.Slider(
                    label="Number of Steps",
                    minimum=1,
                    maximum=96,
                    step=1,
                    value=20,
                )
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=16.0,
                    step=.1,
                    value=6.0,
                )
            submit_button = gr.Button("Generate Video")
            output_video = gr.Video(label="Generated Video")
            submit_button.click(
                fn=generate_video,
                inputs=[prompt, image, size, steps, frames, guidance_scale],
                outputs=[output_video],
            )
        return demo
    
if __name__ == "__main__":
    init_predictor()
    demo = create_gradio_interface()
    demo.launch()