File size: 4,871 Bytes
aa83b59
 
8d4cb5f
aa83b59
dfd6e31
1d25574
 
8d4cb5f
 
652630b
dfd6e31
 
8d4cb5f
dfd6e31
685b08e
dfd6e31
 
 
 
 
685b08e
dfd6e31
 
 
685b08e
dfd6e31
8d4cb5f
685b08e
8d4cb5f
 
 
 
 
dfd6e31
 
 
 
8d4cb5f
 
 
 
dfd6e31
 
 
685b08e
8d4cb5f
 
 
 
 
 
 
 
 
dfd6e31
 
 
685b08e
dfd6e31
 
 
685b08e
8d4cb5f
 
dfd6e31
 
 
685b08e
dfd6e31
 
685b08e
dfd6e31
8d4cb5f
dfd6e31
 
8d4cb5f
dfd6e31
d96a296
dfd6e31
 
 
 
 
8d4cb5f
 
 
dfd6e31
 
 
8d4cb5f
 
dfd6e31
 
 
 
 
 
 
 
652630b
 
 
 
dfd6e31
652630b
 
dfd6e31
652630b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa83b59
 
685b08e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import torch
from transformers import pipeline
from PIL import Image
import numpy as np
import tempfile
import os
from diffusers import VideoToVideoSDPipeline
from diffusers.utils import export_to_video

def generate_video_from_image(image, duration_seconds=10, progress_bar=None):
    """
    Generate a video from an image using VideoToVideoSDPipeline.
    """
    try:
        if progress_bar:
            progress_bar.progress(0.1, "Generating image caption...")
            
        # Setup image captioning pipeline
        captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
        
        # Generate caption
        caption = captioner(image)[0]['generated_text']
        st.write(f"Generated caption: *{caption}*")
        
        if progress_bar:
            progress_bar.progress(0.3, "Loading Video Generation model...")
            
        # Initialize Video Generation pipeline
        pipeline = VideoToVideoSDPipeline.from_pretrained(
            "cerspense/zeroscope_v2_576w",
            torch_dtype=torch.float16
        ).to("cuda" if torch.cuda.is_available() else "cpu")
        
        if progress_bar:
            progress_bar.progress(0.4, "Processing image...")
            
        # Prepare image
        if image.mode != "RGB":
            image = image.convert("RGB")
        image = image.resize((576, 320))  # Resize to model's expected size
        
        if progress_bar:
            progress_bar.progress(0.5, "Generating video frames...")
            
        # Generate video
        num_frames = duration_seconds * 8  # 8 FPS for this model
        video_frames = pipeline(
            image,
            num_inference_steps=50,
            num_frames=num_frames,
            guidance_scale=7.5,
            prompt=caption,
        ).videos[0]
        
        if progress_bar:
            progress_bar.progress(0.8, "Creating final video...")
            
        # Create temporary file for video
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file:
            output_path = tmp_file.name
            
        # Export video frames
        export_to_video(video_frames, output_path, fps=8)
        
        if progress_bar:
            progress_bar.progress(1.0, "Video generation complete!")
            
        return output_path, caption
        
    except Exception as e:
        st.error(f"Error generating video: {str(e)}")
        raise

def main():
    st.set_page_config(page_title="AI Video Generator", page_icon="πŸŽ₯")
    
    st.title("πŸŽ₯ AI Video Generator")
    st.write("""
    Upload an image to generate a video with AI-powered motion and transitions.
    The app will automatically generate a caption for your image and use it as inspiration for the video.
    """)
    
    # Add warning about computational requirements
    st.warning("Note: Video generation may take several minutes depending on the duration and available computing resources.")
    
    # File uploader
    uploaded_file = st.file_uploader("Choose an image", type=['png', 'jpg', 'jpeg'])
    
    # Duration selector (adjusted for this model's capabilities)
    duration = st.slider("Video duration (seconds)", min_value=1, max_value=15, value=5)
    
    if uploaded_file is not None:
        # Display uploaded image
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        # Generate button
        if st.button("Generate Video"):
            try:
                # Create a progress bar
                progress_text = "Operation in progress. Please wait..."
                my_bar = st.progress(0, text=progress_text)
                
                # Generate video
                video_path, caption = generate_video_from_image(image, duration, my_bar)
                
                if video_path and os.path.exists(video_path):
                    # Read the video file
                    with open(video_path, 'rb') as video_file:
                        video_bytes = video_file.read()
                    
                    # Create download button
                    st.download_button(
                        label="Download Video",
                        data=video_bytes,
                        file_name="generated_video.mp4",
                        mime="video/mp4"
                    )
                    
                    # Display video
                    st.video(video_bytes)
                    
                    # Clean up temporary file
                    os.unlink(video_path)
                else:
                    st.error("Failed to generate video. Please try again.")
            
            except Exception as e:
                st.error(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()