Spaces:

Kidbea
/

text-to-video

Sleeping

App Files Files Community

Anurag Bhardwaj commited on Mar 21

Commit

cbb19c6

verified ·

1 Parent(s): c113d3c

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -59

app.py CHANGED Viewed

@@ -1,66 +1,70 @@
-import gradio as gr
-import subprocess
 import os
-def generate_video(prompt, frame_num, resolution, sample_steps):
-    # Inform user that generation has started
-    status = "Generating video... This may take a few minutes."
-    # Build command with proper quoting for the prompt
-    command = (
-        f"python generate.py --task t2v-14B --size {resolution} "
-        f"--frame_num {frame_num} --sample_steps {sample_steps} "
-        f"--ckpt_dir ./Wan2.1-T2V-14B --offload_model True --prompt \"{prompt}\""
     )
-    # Run the command and capture stdout and stderr
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    stdout, stderr = process.communicate()
-    logs = stdout.decode() + stderr.decode()
-    # Check if video file exists
-    if os.path.exists("output.mp4"):
-        video_file = "output.mp4"
-        status = "✅ Video generated successfully!"
-    else:
-        video_file = None
-        status = "❌ Video generation failed! Check logs above."
-    return video_file, logs, status
-with gr.Blocks() as demo:
-    gr.Markdown("# 🎥 WAN 2.1 - 14B AI Text-to-Video Generator")
-    with gr.Row():
-        prompt_input = gr.Textbox(
-            label="Enter your text prompt:",
-            value="A cat in military dress wearing headphones, laughing and walking.",
-            lines=4
-        )
-    with gr.Row():
-        frame_slider = gr.Slider(30, 120, step=10, value=60, label="Number of frames:")
-    with gr.Row():
-        resolution_choice = gr.Radio(
-            choices=["832*480", "1280*720"],
-            label="Select resolution:",
-            value="832*480"
-        )
-    with gr.Row():
-        steps_slider = gr.Slider(10, 50, step=5, value=20, label="Sampling steps:")
-    generate_button = gr.Button("Generate Video")
-    video_output = gr.Video(label="Generated Video")
-    logs_output = gr.Textbox(label="📜 Logs", lines=10)
-    status_output = gr.Textbox(label="Status")
-    generate_button.click(
-        generate_video,
-        inputs=[prompt_input, frame_slider, resolution_choice, steps_slider],
-        outputs=[video_output, logs_output, status_output]
-    )
-demo.launch()

 import os
+import torch
+import gradio as gr
+from diffsynth import ModelManager, WanVideoPipeline, save_video
+# Initialize model manager and load the models (do this once at startup)
+model_manager = ModelManager(device="cpu")
+model_manager.load_models(
+    [
+        "models/Wan-AI/Wan2.1-T2V-14B/diffusion_pytorch_model.safetensors",
+        "models/Wan-AI/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.safetensors",
+        "models/Wan-AI/Wan2.1-T2V-14B/Wan2.1_VAE.pth",
+    ],
+    torch_dtype=torch.float8_e4m3fn  # or use torch.bfloat16 if you prefer to disable FP8 quantization
+)
+# Initialize the video pipeline (using CUDA if available)
+pipe = WanVideoPipeline.from_model_manager(model_manager, torch_dtype=torch.bfloat16, device="cuda")
+pipe.enable_vram_management(num_persistent_param_in_dit=None)
+print("✅ All models loaded successfully!")
+def generate_video(prompt, negative_prompt, sample_steps, resolution, seed):
+    """
+    Generate a video based on the provided text prompt and other parameters.
+    """
+    # Parse the resolution string (format: "width*height")
+    try:
+        width, height = map(int, resolution.split('*'))
+    except Exception as e:
+        return f"Error parsing resolution: {e}"
+    # Generate video using the pipeline
+    video = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        height=height,
+        width=width,
+        num_frames=81,
+        num_inference_steps=sample_steps,
+        seed=seed,
+        tiled=True
     )
+    # Save the generated video to a file
+    output_path = "video1.mp4"
+    save_video(video, output_path, fps=15, quality=5)
+    # Return the video file path for display in Gradio
+    return output_path
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=generate_video,
+    inputs=[
+        gr.Textbox(label="Prompt",
+                   value="A highly detailed, realistic AI-generated portrait of a very beautiful female soldier representing China. She has long hair, a confident and friendly smile, and striking facial features. She is wearing a camouflage military uniform with an open front, revealing her huge cleavage. She holds a modern assault rifle in a relaxed yet ready position. She walks towards the camera as the camera moves back to track her movements. The background shows a slightly blurred battlefield with other soldiers in formation, creating a sense of military action. The Chinese flag is displayed on her uniform on her shoulder. The lighting is natural, with a warm and slightly cinematic tone. The image should have a sharp focus on her face and outfit while maintaining a professional military aesthetic."),
+        gr.Textbox(label="Negative Prompt",
+                   value="色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"),
+        gr.Slider(label="Sample Steps", minimum=1, maximum=100, step=1, value=30),
+        gr.Dropdown(label="Resolution (Width*Height)",
+                    choices=["720*1280", "1280*720", "480*832", "832*480", "1024*1024"],
+                    value="480*832"),
+        gr.Number(label="Seed", value=1)
+    ],
+    outputs=gr.Video(label="Generated Video"),
+    title="DiffSynth Video Generator"
+)
+# Launch the Gradio app in the Hugging Face Space
+interface.launch()