Spaces:
Sleeping
Sleeping
Anurag Bhardwaj
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,66 +1,70 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import subprocess
|
3 |
import os
|
|
|
|
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
)
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
|
19 |
-
logs = stdout.decode() + stderr.decode()
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
video_file = "output.mp4"
|
24 |
-
status = "✅ Video generated successfully!"
|
25 |
-
else:
|
26 |
-
video_file = None
|
27 |
-
status = "❌ Video generation failed! Check logs above."
|
28 |
-
|
29 |
-
return video_file, logs, status
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
)
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
value="832*480"
|
49 |
-
)
|
50 |
-
|
51 |
-
with gr.Row():
|
52 |
-
steps_slider = gr.Slider(10, 50, step=5, value=20, label="Sampling steps:")
|
53 |
-
|
54 |
-
generate_button = gr.Button("Generate Video")
|
55 |
-
|
56 |
-
video_output = gr.Video(label="Generated Video")
|
57 |
-
logs_output = gr.Textbox(label="📜 Logs", lines=10)
|
58 |
-
status_output = gr.Textbox(label="Status")
|
59 |
-
|
60 |
-
generate_button.click(
|
61 |
-
generate_video,
|
62 |
-
inputs=[prompt_input, frame_slider, resolution_choice, steps_slider],
|
63 |
-
outputs=[video_output, logs_output, status_output]
|
64 |
-
)
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import torch
|
3 |
+
import gradio as gr
|
4 |
+
from diffsynth import ModelManager, WanVideoPipeline, save_video
|
5 |
|
6 |
+
# Initialize model manager and load the models (do this once at startup)
|
7 |
+
model_manager = ModelManager(device="cpu")
|
8 |
+
model_manager.load_models(
|
9 |
+
[
|
10 |
+
"models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
|
11 |
+
"models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
|
12 |
+
"models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
|
13 |
+
],
|
14 |
+
torch_dtype=torch.float8_e4m3fn # or use torch.bfloat16 if you prefer to disable FP8 quantization
|
15 |
+
)
|
16 |
+
|
17 |
+
# Initialize the video pipeline (using CUDA if available)
|
18 |
+
pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
|
19 |
+
pipe.enable_vram_management(num_persistent_param_in_dit=None)
|
20 |
+
print("✅ All models loaded successfully!")
|
21 |
+
|
22 |
+
def generate_video(prompt, negative_prompt, sample_steps, resolution, seed):
|
23 |
+
"""
|
24 |
+
Generate a video based on the provided text prompt and other parameters.
|
25 |
+
"""
|
26 |
+
# Parse the resolution string (format: "width*height")
|
27 |
+
try:
|
28 |
+
width, height = map(int, resolution.split('*'))
|
29 |
+
except Exception as e:
|
30 |
+
return f"Error parsing resolution: {e}"
|
31 |
+
|
32 |
+
# Generate video using the pipeline
|
33 |
+
video = pipe(
|
34 |
+
prompt=prompt,
|
35 |
+
negative_prompt=negative_prompt,
|
36 |
+
height=height,
|
37 |
+
width=width,
|
38 |
+
num_frames=81,
|
39 |
+
num_inference_steps=sample_steps,
|
40 |
+
seed=seed,
|
41 |
+
tiled=True
|
42 |
)
|
43 |
|
44 |
+
# Save the generated video to a file
|
45 |
+
output_path = "video1.mp4"
|
46 |
+
save_video(video, output_path, fps=15, quality=5)
|
|
|
47 |
|
48 |
+
# Return the video file path for display in Gradio
|
49 |
+
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
# Create the Gradio interface
|
52 |
+
interface = gr.Interface(
|
53 |
+
fn=generate_video,
|
54 |
+
inputs=[
|
55 |
+
gr.Textbox(label="Prompt",
|
56 |
+
value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."),
|
57 |
+
gr.Textbox(label="Negative Prompt",
|
58 |
+
value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"),
|
59 |
+
gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
|
60 |
+
gr.Dropdown(label="Resolution (Width*Height)",
|
61 |
+
choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
|
62 |
+
value="480*832"),
|
63 |
+
gr.Number(label="Seed", value=1)
|
64 |
+
],
|
65 |
+
outputs=gr.Video(label="Generated Video"),
|
66 |
+
title="DiffSynth Video Generator"
|
67 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
# Launch the Gradio app in the Hugging Face Space
|
70 |
+
interface.launch()
|