File size: 6,133 Bytes
9bf3376
 
 
 
e297d6c
 
5d58ce5
 
e297d6c
9bf3376
e297d6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bf3376
 
 
 
 
e297d6c
9bf3376
 
 
e297d6c
 
 
 
9bf3376
 
e297d6c
5d58ce5
9bf3376
e297d6c
 
 
9bf3376
 
e297d6c
 
9bf3376
 
 
 
 
 
 
 
e297d6c
9bf3376
e297d6c
 
 
 
 
 
 
 
 
 
 
 
 
9bf3376
 
e297d6c
 
 
9bf3376
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d58ce5
9bf3376
 
 
 
 
 
e297d6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bf3376
 
 
e297d6c
 
 
 
 
 
 
 
 
9bf3376
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import streamlit as st
from transformers import pipeline
from PIL import Image
import os
import pathlib
from huggingface_hub import snapshot_download
from modelscope.pipelines import pipeline as modelscope_pipeline
from modelscope.outputs import OutputKeys
import shutil

# Create a downloads directory if it doesn't exist
if not os.path.exists('downloads'):
    os.makedirs('downloads')

def initialize_models():
    """Initialize and cache the models to avoid reloading."""
    if 'caption_pipeline' not in st.session_state:
        st.session_state.caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    
    if 'video_pipeline' not in st.session_state:
        # Download and cache the model
        model_dir = pathlib.Path('weights')
        snapshot_download(
            'damo-vilab/modelscope-damo-text-to-video-synthesis',
            repo_type='model', 
            local_dir=model_dir
        )
        st.session_state.video_pipeline = modelscope_pipeline(
            'text-to-video-synthesis', 
            model_dir.as_posix()
        )

def save_video(video_path, caption):
    """Save video to downloads directory with a meaningful name."""
    # Create a filename from the caption
    safe_caption = "".join(x for x in caption[:30] if x.isalnum() or x in (' ','-','_')).strip()
    save_name = f"video_{safe_caption}.mp4"
    save_path = os.path.join('downloads', save_name)
    
    # Copy the video file
    shutil.copy2(video_path, save_path)
    return save_path

def generate_video_from_image(image, progress_bar=None):
    """Generate a video based on image caption using ModelScope's text-to-video model."""
    try:
        if progress_bar:
            progress_bar.progress(0.1, "Generating image caption...")
        
        # Generate caption
        caption = st.session_state.caption_pipeline(image)[0]['generated_text']
        st.write(f"Generated caption: *{caption}*")
        
        if progress_bar:
            progress_bar.progress(0.3, "Generating video...")
        
        # Prepare input for video generation
        input_text = {'text': caption}
        
        # Generate video
        output = st.session_state.video_pipeline(input_text)
        video_path = output[OutputKeys.OUTPUT_VIDEO]
        
        # Save video with meaningful name
        final_path = save_video(video_path, caption)
        
        if progress_bar:
            progress_bar.progress(1.0, "Video generation complete!")
        
        return final_path, caption
        
    except Exception as e:
        st.error(f"Error generating video: {str(e)}")
        raise

def main():
    st.set_page_config(page_title="AI Video Generator", page_icon="🎥")
    
    st.title("🎥 Text-to-Video Generator")
    st.write("""
    Upload an image to generate a video based on its content. The app will:
    1. Generate a caption for your image
    2. Create a video based on that caption
    3. Provide options to view and download the video
    """)
    
    # Display model limitations
    st.warning("""
    Model Limitations:
    - Only English text is supported
    - Cannot generate clear text in videos
    - May have limitations with complex scenes
    - Generation takes several minutes
    """)
    
    # Initialize models
    with st.spinner("Loading models... This may take a minute..."):
        initialize_models()
    
    # File uploader
    uploaded_file = st.file_uploader("Choose an image", type=['png', 'jpg', 'jpeg'])
    
    if uploaded_file is not None:
        # Display uploaded image
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
        
        # Generate button
        if st.button("Generate Video"):
            try:
                # Create a progress bar
                progress_text = "Operation in progress. Please wait..."
                my_bar = st.progress(0, text=progress_text)
                
                # Generate video
                video_path, caption = generate_video_from_image(image, my_bar)
                
                if video_path and os.path.exists(video_path):
                    # Read the video file
                    with open(video_path, 'rb') as video_file:
                        video_bytes = video_file.read()
                    
                    # Create a container for the video and download options
                    st.success("Video generated successfully!")
                    
                    col1, col2 = st.columns(2)
                    
                    with col1:
                        # Primary download button
                        st.download_button(
                            label="💾 Download Video",
                            data=video_bytes,
                            file_name=os.path.basename(video_path),
                            mime="video/mp4",
                            key="download1"
                        )
                    
                    with col2:
                        # Additional download button with caption
                        st.download_button(
                            label="📥 Download with Caption",
                            data=video_bytes,
                            file_name=f"{caption[:30]}.mp4",
                            mime="video/mp4",
                            key="download2"
                        )
                    
                    # Display video
                    st.video(video_bytes)
                    
                    # Display additional information
                    st.info(f"""
                    Video Details:
                    - Caption: {caption}
                    - Filename: {os.path.basename(video_path)}
                    - Size: {len(video_bytes)/1024/1024:.1f} MB
                    """)
                    
                else:
                    st.error("Failed to generate video. Please try again.")
            
            except Exception as e:
                st.error(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()