|
import spaces |
|
import gradio as gr |
|
import time |
|
import torch |
|
import gc |
|
import tempfile |
|
|
|
from diffusers.utils import export_to_video |
|
|
|
from video_model import t2v_pipe |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
def create_demo() -> gr.Blocks: |
|
|
|
@spaces.GPU(duration=60) |
|
def text_to_video( |
|
prompt: str, |
|
negative_prompt: str, |
|
width: int = 768, |
|
height: int = 512, |
|
num_frames: int = 121, |
|
frame_rate: int = 25, |
|
num_inference_steps: int = 30, |
|
seed: int = 8, |
|
progress: gr.Progress = gr.Progress(), |
|
): |
|
generator = torch.Generator(device=device).manual_seed(seed) |
|
run_task_time = 0 |
|
time_cost_str = '' |
|
run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str) |
|
try: |
|
with torch.no_grad(): |
|
video = t2v_pipe( |
|
prompt=prompt, |
|
negative_prompt=negative_prompt, |
|
generator=generator, |
|
width=width, |
|
height=height, |
|
num_frames=num_frames, |
|
num_inference_steps=num_inference_steps, |
|
).frames[0] |
|
finally: |
|
torch.cuda.empty_cache() |
|
gc.collect() |
|
run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str) |
|
|
|
output_path = tempfile.mktemp(suffix=".mp4") |
|
export_to_video(video, output_path, fps=frame_rate) |
|
|
|
del video |
|
torch.cuda.empty_cache() |
|
return output_path, time_cost_str |
|
|
|
def get_time_cost(run_task_time, time_cost_str): |
|
now_time = int(time.time()*1000) |
|
if run_task_time == 0: |
|
time_cost_str = 'start' |
|
else: |
|
if time_cost_str != '': |
|
time_cost_str += f'-->' |
|
time_cost_str += f'{now_time - run_task_time}' |
|
run_task_time = now_time |
|
return run_task_time, time_cost_str |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Column(): |
|
txt2vid_prompt = gr.Textbox( |
|
label="Enter Your Prompt", |
|
placeholder="Describe the video you want to generate (minimum 50 characters)...", |
|
value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.", |
|
lines=5, |
|
) |
|
|
|
txt2vid_negative_prompt = gr.Textbox( |
|
label="Enter Negative Prompt", |
|
placeholder="Describe what you don't want in the video...", |
|
value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly", |
|
lines=2, |
|
) |
|
|
|
txt2vid_generate = gr.Button( |
|
"Generate Video", |
|
variant="primary", |
|
size="lg", |
|
) |
|
|
|
with gr.Column(): |
|
txt2vid_output = gr.Video(label="Generated Output") |
|
txt2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False) |
|
|
|
txt2vid_generate.click( |
|
fn=text_to_video, |
|
inputs=[txt2vid_prompt, txt2vid_negative_prompt], |
|
outputs=[txt2vid_output, txt2vid_generated_cost], |
|
) |
|
|
|
return demo |
|
|