Anurag Bhardwaj commited on
Commit
cbb19c6
·
verified ·
1 Parent(s): c113d3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -59
app.py CHANGED
@@ -1,66 +1,70 @@
1
- import gradio as gr
2
- import subprocess
3
  import os
 
 
 
4
 
5
- def generate_video(prompt, frame_num, resolution, sample_steps):
6
- # Inform user that generation has started
7
- status = "Generating video... This may take a few minutes."
8
-
9
- # Build command with proper quoting for the prompt
10
- command = (
11
- f"python generate.py --task t2v-14B --size {resolution} "
12
- f"--frame_num {frame_num} --sample_steps {sample_steps} "
13
- f"--ckpt_dir ./Wan2.1-T2V-14B --offload_model True --prompt \"{prompt}\""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
- # Run the command and capture stdout and stderr
17
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
18
- stdout, stderr = process.communicate()
19
- logs = stdout.decode() + stderr.decode()
20
 
21
- # Check if video file exists
22
- if os.path.exists("output.mp4"):
23
- video_file = "output.mp4"
24
- status = "✅ Video generated successfully!"
25
- else:
26
- video_file = None
27
- status = "❌ Video generation failed! Check logs above."
28
-
29
- return video_file, logs, status
30
 
31
- with gr.Blocks() as demo:
32
- gr.Markdown("# 🎥 WAN 2.1 - 14B AI Text-to-Video Generator")
33
-
34
- with gr.Row():
35
- prompt_input = gr.Textbox(
36
- label="Enter your text prompt:",
37
- value="A cat in military dress wearing headphones, laughing and walking.",
38
- lines=4
39
- )
40
-
41
- with gr.Row():
42
- frame_slider = gr.Slider(30, 120, step=10, value=60, label="Number of frames:")
43
-
44
- with gr.Row():
45
- resolution_choice = gr.Radio(
46
- choices=["832*480", "1280*720"],
47
- label="Select resolution:",
48
- value="832*480"
49
- )
50
-
51
- with gr.Row():
52
- steps_slider = gr.Slider(10, 50, step=5, value=20, label="Sampling steps:")
53
-
54
- generate_button = gr.Button("Generate Video")
55
-
56
- video_output = gr.Video(label="Generated Video")
57
- logs_output = gr.Textbox(label="📜 Logs", lines=10)
58
- status_output = gr.Textbox(label="Status")
59
-
60
- generate_button.click(
61
- generate_video,
62
- inputs=[prompt_input, frame_slider, resolution_choice, steps_slider],
63
- outputs=[video_output, logs_output, status_output]
64
- )
65
 
66
- demo.launch()
 
 
 
 
1
  import os
2
+ import torch
3
+ import gradio as gr
4
+ from diffsynth import ModelManager, WanVideoPipeline, save_video
5
 
6
+ # Initialize model manager and load the models (do this once at startup)
7
+ model_manager = ModelManager(device="cpu")
8
+ model_manager.load_models(
9
+ [
10
+ "models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
11
+ "models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
12
+ "models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
13
+ ],
14
+ torch_dtype=torch.float8_e4m3fn # or use torch.bfloat16 if you prefer to disable FP8 quantization
15
+ )
16
+
17
+ # Initialize the video pipeline (using CUDA if available)
18
+ pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
19
+ pipe.enable_vram_management(num_persistent_param_in_dit=None)
20
+ print("✅ All models loaded successfully!")
21
+
22
+ def generate_video(prompt, negative_prompt, sample_steps, resolution, seed):
23
+ """
24
+ Generate a video based on the provided text prompt and other parameters.
25
+ """
26
+ # Parse the resolution string (format: "width*height")
27
+ try:
28
+ width, height = map(int, resolution.split('*'))
29
+ except Exception as e:
30
+ return f"Error parsing resolution: {e}"
31
+
32
+ # Generate video using the pipeline
33
+ video = pipe(
34
+ prompt=prompt,
35
+ negative_prompt=negative_prompt,
36
+ height=height,
37
+ width=width,
38
+ num_frames=81,
39
+ num_inference_steps=sample_steps,
40
+ seed=seed,
41
+ tiled=True
42
  )
43
 
44
+ # Save the generated video to a file
45
+ output_path = "video1.mp4"
46
+ save_video(video, output_path, fps=15, quality=5)
 
47
 
48
+ # Return the video file path for display in Gradio
49
+ return output_path
 
 
 
 
 
 
 
50
 
51
+ # Create the Gradio interface
52
+ interface = gr.Interface(
53
+ fn=generate_video,
54
+ inputs=[
55
+ gr.Textbox(label="Prompt",
56
+ value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."),
57
+ gr.Textbox(label="Negative Prompt",
58
+ value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"),
59
+ gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
60
+ gr.Dropdown(label="Resolution (Width*Height)",
61
+ choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
62
+ value="480*832"),
63
+ gr.Number(label="Seed", value=1)
64
+ ],
65
+ outputs=gr.Video(label="Generated Video"),
66
+ title="DiffSynth Video Generator"
67
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Launch the Gradio app in the Hugging Face Space
70
+ interface.launch()