import streamlit as st import torch from transformers import pipeline from PIL import Image import numpy as np import tempfile import os from diffusers import VideoToVideoSDPipeline from diffusers.utils import export_to_video def generate_video_from_image(image, duration_seconds=10, progress_bar=None): """ Generate a video from an image using VideoToVideoSDPipeline. """ try: if progress_bar: progress_bar.progress(0.1, "Generating image caption...") # Setup image captioning pipeline captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") # Generate caption caption = captioner(image)[0]['generated_text'] st.write(f"Generated caption: *{caption}*") if progress_bar: progress_bar.progress(0.3, "Loading Video Generation model...") # Initialize Video Generation pipeline pipeline = VideoToVideoSDPipeline.from_pretrained( "cerspense/zeroscope_v2_576w", torch_dtype=torch.float16 ).to("cuda" if torch.cuda.is_available() else "cpu") if progress_bar: progress_bar.progress(0.4, "Processing image...") # Prepare image if image.mode != "RGB": image = image.convert("RGB") image = image.resize((576, 320)) # Resize to model's expected size if progress_bar: progress_bar.progress(0.5, "Generating video frames...") # Generate video num_frames = duration_seconds * 8 # 8 FPS for this model video_frames = pipeline( image, num_inference_steps=50, num_frames=num_frames, guidance_scale=7.5, prompt=caption, ).videos[0] if progress_bar: progress_bar.progress(0.8, "Creating final video...") # Create temporary file for video with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file: output_path = tmp_file.name # Export video frames export_to_video(video_frames, output_path, fps=8) if progress_bar: progress_bar.progress(1.0, "Video generation complete!") return output_path, caption except Exception as e: st.error(f"Error generating video: {str(e)}") raise def main(): st.set_page_config(page_title="AI Video Generator", page_icon="🎥") st.title("🎥 AI Video Generator") st.write(""" Upload an image to generate a video with AI-powered motion and transitions. The app will automatically generate a caption for your image and use it as inspiration for the video. """) # Add warning about computational requirements st.warning("Note: Video generation may take several minutes depending on the duration and available computing resources.") # File uploader uploaded_file = st.file_uploader("Choose an image", type=['png', 'jpg', 'jpeg']) # Duration selector (adjusted for this model's capabilities) duration = st.slider("Video duration (seconds)", min_value=1, max_value=15, value=5) if uploaded_file is not None: # Display uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) # Generate button if st.button("Generate Video"): try: # Create a progress bar progress_text = "Operation in progress. Please wait..." my_bar = st.progress(0, text=progress_text) # Generate video video_path, caption = generate_video_from_image(image, duration, my_bar) if video_path and os.path.exists(video_path): # Read the video file with open(video_path, 'rb') as video_file: video_bytes = video_file.read() # Create download button st.download_button( label="Download Video", data=video_bytes, file_name="generated_video.mp4", mime="video/mp4" ) # Display video st.video(video_bytes) # Clean up temporary file os.unlink(video_path) else: st.error("Failed to generate video. Please try again.") except Exception as e: st.error(f"An error occurred: {str(e)}") if __name__ == "__main__": main()