Spaces:

testdeep123
/

test1

Running

App Files Files Community

testdeep123 commited on 19 days ago

Commit

0212e21

verified ·

1 Parent(s): c7600a9

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -109

app.py CHANGED Viewed

@@ -1,27 +1,31 @@
 """
-Full Code: Orbit Video Engine with Advanced Gradio UI
-This script combines the video-generation code (using Kokoro for TTS,
-MoviePy for video operations, Pexels/Google image/video search, etc.) with a
-Gradio Blocks UI that allows:
-  1. Content input and script generation.
-  2. Dynamic clip editing (change prompt, narration, or upload custom media).
-  3. Video settings (resolution, render speed, background music, subtitle settings).
-  4. Final video generation and preview/download.
 """
-# --------- IMPORTS ---------
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 import os, time, random, math, json, tempfile, shutil, re, requests
-from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips, CompositeVideoClip, TextClip, CompositeAudioClip
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
-from pydub.generators import Sine
-from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 from bs4 import BeautifulSoup
 from urllib.parse import quote
@@ -30,24 +34,25 @@ from gtts import gTTS
 import cv2
 import gradio as gr
-# --------- GLOBAL CONFIGURATION ---------
-pipeline = KPipeline(lang_code='a')  # Use American English voice. (Uses 'af_heart' for American English)
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
-USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-# These globals are later set per run
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
-# --------- HELPER FUNCTIONS ---------
 def generate_script(user_input):
-    """Generate documentary script using OpenRouter API."""
     headers = {
         'Authorization': f'Bearer {OPENROUTER_API_KEY}',
         'HTTP-Referer': 'https://your-domain.com',
@@ -73,7 +78,7 @@ Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 No Special Formatting:
-No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
@@ -131,8 +136,10 @@ Now here is the Topic/scrip: {user_input}
 def parse_script(script_text):
     """
-    Parse the generated script into a list of clip elements.
-    Each clip consists of a media element (with a 'prompt') and a TTS element with narration.
     """
     sections = {}
     current_title = None
@@ -259,7 +266,7 @@ def search_pexels_images(query, pexels_api_key):
     return None
 def search_google_images(query):
-    """Search for images on Google Images."""
     try:
         search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
         headers = {"User-Agent": USER_AGENT}
@@ -281,10 +288,10 @@ def search_google_images(query):
         return None
 def download_image(image_url, filename):
-    """Download an image from a URL to a local file with verification."""
     try:
         headers = {"User-Agent": USER_AGENT}
-        print(f"Downloading image from: {image_url} to {filename}")
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
@@ -298,10 +305,9 @@ def download_image(image_url, filename):
             if img.mode != 'RGB':
                 img = img.convert('RGB')
                 img.save(filename)
-            print(f"Image validated and processed: {filename}")
             return filename
         except Exception as e_validate:
-            print(f"Downloaded file is not a valid image: {e_validate}")
             if os.path.exists(filename):
                 os.remove(filename)
             return None
@@ -319,7 +325,6 @@ def download_video(video_url, filename):
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
-        print(f"Video downloaded to: {filename}")
         return filename
     except Exception as e:
         print(f"Video download error: {e}")
@@ -330,17 +335,14 @@ def download_video(video_url, filename):
 def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
     """
     Generate a visual asset for the clip.
-    If user_image is provided (from custom media upload) use it; otherwise,
-    use Pexels (and Google Images for news related queries) with fallbacks.
     """
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
     if user_image is not None:
-        # Assume user_image is a local path or file-like object.
-        print(f"Using custom media provided for prompt: {prompt}")
         return {"path": user_image, "asset_type": "image"}
     if "news" in prompt.lower():
-        print(f"News query detected: {prompt}. Using Google Images...")
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
         if image_url:
@@ -368,11 +370,11 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
             downloaded_fallback = download_image(fallback_url, fallback_file)
             if downloaded_fallback:
                 return {"path": downloaded_fallback, "asset_type": "image"}
-    print(f"Failed to generate asset for: {prompt}")
     return None
 def generate_silent_audio(duration, sample_rate=24000):
-    """Generate a silent WAV file for TTS fallback."""
     num_samples = int(duration * sample_rate)
     silence = np.zeros(num_samples, dtype=np.float32)
     silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
@@ -380,7 +382,7 @@ def generate_silent_audio(duration, sample_rate=24000):
     return silent_path
 def generate_tts(text, voice):
-    """Generate TTS audio using Kokoro, falling back to gTTS if needed."""
     safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
     file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
     if os.path.exists(file_path):
@@ -407,7 +409,7 @@ def generate_tts(text, voice):
             return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
-    """Apply a smooth Ken Burns effect to images."""
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
@@ -428,32 +430,17 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
     if effect_type is None or effect_type == "random":
         effect_type = random.choice(available_effects)
     if effect_type == "zoom-in":
-        start_zoom = 0.9
-        end_zoom = 1.1
-        start_center = (new_width / 2, new_height / 2)
-        end_center = start_center
     elif effect_type == "zoom-out":
-        start_zoom = 1.1
-        end_zoom = 0.9
-        start_center = (new_width / 2, new_height / 2)
-        end_center = start_center
     elif effect_type == "pan-left":
-        start_zoom = 1.0
-        end_zoom = 1.0
-        start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
-        end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "pan-right":
-        start_zoom = 1.0
-        end_zoom = 1.0
-        start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
-        end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
     elif effect_type == "up-left":
-        start_zoom = 1.0
-        end_zoom = 1.0
-        start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
-        end_center = (target_w / 2, target_h / 2)
     else:
-        raise ValueError(f"Unsupported effect_type: {effect_type}")
     def transform_frame(get_frame, t):
         frame = get_frame(t)
         ratio = t / clip.duration if clip.duration > 0 else 0
@@ -463,10 +450,8 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
         crop_h = int(target_h / current_zoom)
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
-        min_center_x = crop_w / 2
-        max_center_x = new_width - crop_w / 2
-        min_center_y = crop_h / 2
-        max_center_y = new_height - crop_h / 2
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
@@ -490,17 +475,16 @@ def resize_to_fill(clip, target_resolution):
     return clip
 def find_mp3_files():
-    """Find an MP3 file for background music."""
     mp3_files = []
     for root, dirs, files in os.walk('.'):
         for file in files:
             if file.endswith('.mp3'):
-                mp3_path = os.path.join(root, file)
-                mp3_files.append(mp3_path)
     return mp3_files[0] if mp3_files else None
 def add_background_music(final_video, bg_music_volume=0.08):
-    """Add background music to the final video."""
     try:
         bg_music_path = find_mp3_files()
         if bg_music_path and os.path.exists(bg_music_path):
@@ -520,7 +504,7 @@ def add_background_music(final_video, bg_music_volume=0.08):
         return final_video
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
-    """Create a video clip from a media asset and its corresponding TTS audio, optionally with subtitles."""
     try:
         if not os.path.exists(media_path) or not os.path.exists(tts_path):
             return None
@@ -595,7 +579,7 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
         return None
 def fix_imagemagick_policy():
-    """Attempt to fix ImageMagick security policies."""
     try:
         policy_paths = [
             "/etc/ImageMagick-6/policy.xml",
@@ -617,8 +601,8 @@ def fix_imagemagick_policy():
 def generate_video(user_input, resolution, caption_option):
     """
     Original video generation function.
-    This version takes only the basic inputs and uses the global clip data generated by the script.
-    You may need to extend this to use updated clip data from the dynamic clip editor.
     """
     global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
     if resolution == "Full HD (1920x1080)":
@@ -629,9 +613,7 @@ def generate_video(user_input, resolution, caption_option):
         TARGET_RESOLUTION = (1920, 1080)
     CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
     TEMP_FOLDER = tempfile.mkdtemp()
-    fix_success = fix_imagemagick_policy()
-    if not fix_success:
-        print("Unable to fix ImageMagick policies; proceeding with alternative methods.")
     print("Generating script from API...")
     script = generate_script(user_input)
     if not script:
@@ -678,15 +660,16 @@ def generate_video(user_input, resolution, caption_option):
     shutil.rmtree(TEMP_FOLDER)
     return OUTPUT_VIDEO_FILENAME
-# --------- NEW CALLBACK FUNCTIONS FOR THE NEW UI ---------
-# Global variable to store clip data from script-generation.
 generated_clip_data = []
 def generate_script_clips(topic_input, full_script):
     """
-    Callback when the user clicks "📝 Generate Script & Load Clips".
-    Uses full_script if provided, else generates a script based on topic_input.
-    Returns generated raw script and a JSON string representing clip data.
     """
     input_text = full_script if full_script.strip() != "" else topic_input
     script = generate_script(input_text)
@@ -701,7 +684,7 @@ def generate_script_clips(topic_input, full_script):
             clip_info = {
                 "prompt": media_elem.get("prompt", ""),
                 "narration": tts_elem.get("text", ""),
-                "custom_media": None
             }
             clip_list.append(clip_info)
     global generated_clip_data
@@ -710,25 +693,24 @@ def generate_script_clips(topic_input, full_script):
 def update_clip_editor(clip_json):
     """
-    Generate a dynamic interface (using Gradio Blocks) for editing clip details.
-    Returns a list of Accordion components.
     """
     clip_list = json.loads(clip_json)
     editors = []
     for idx, clip in enumerate(clip_list, start=1):
-        accordion_title = f"Clip {idx}: {clip['prompt']}"
-        # Create an accordion for each clip.
-        with gr.Accordion(label=accordion_title, open=(idx<=2)) as accordion:
             prompt_box = gr.Textbox(label="Visual Prompt", value=clip["prompt"])
-            narration_box = gr.Textbox(label="Narration Text", value=clip["narration"], lines=3)
-            custom_media_box = gr.File(label="Custom Media Upload (overrides prompt)", file_types=[".jpg", ".png", ".mp4"])
-            # Pack into a dictionary structure.
-            editors.append({
-                "prompt": prompt_box,
-                "narration": narration_box,
-                "custom_media": custom_media_box
-            })
-        editors.append(accordion)
     return editors
 def generate_final_video(topic_input, full_script, clip_data_json, resolution, render_speed,
@@ -736,36 +718,37 @@ def generate_final_video(topic_input, full_script, clip_data_json, resolution, r
                          subtitle_enabled, font_dropdown, font_size, outline_width,
                          font_color, outline_color, subtitle_position):
     """
-    Callback for "🎬 Generate Video" button.
-    Here we would combine the user-edited clip data with settings and call video generation.
-    Note: For demonstration, we will use the original generate_video function.
     """
-    # In a real integration you would process clip_data_json to update each clip with custom overrides.
     print("Final settings:")
     print(f"Resolution: {resolution}, Render Speed: {render_speed}, Video Clip %: {video_clip_percent}, Zoom/Pan: {zoom_pan}")
     if bg_music_file is not None:
         print("Custom background music provided.")
-    # For now, we simply call generate_video using topic_input and full_script.
     video_file = generate_video(topic_input, resolution, "Yes")
-    return video_file
-# --------- GRADIO BLOCKS UI ---------
 with gr.Blocks(title="🚀 Orbit Video Engine") as demo:
     with gr.Row():
         # Column 1: Content Input & Script Generation
         with gr.Column(scale=1):
             gr.Markdown("## 1. Content Input")
             topic_input = gr.Textbox(label="Topic Input", placeholder="Enter your video topic here...")
-            full_script = gr.Textbox(label="Or Paste Full Script", placeholder="Paste full script here...", lines=5)
             generate_script_btn = gr.Button("📝 Generate Script & Load Clips")
             generated_script_disp = gr.Textbox(label="Generated Script", interactive=False, visible=False)
             clip_data_storage = gr.Textbox(visible=False)
-        # Column 2: Clip Editor (Dynamic)
         with gr.Column(scale=1):
             gr.Markdown("## 2. Edit Clips")
-            gr.Markdown("Modify each clip's visual prompt, narration or upload custom media.")
             clip_editor_container = gr.Column(visible=False)
-        # Column 3: Settings & Output
         with gr.Column(scale=1):
             gr.Markdown("## 3. Video Settings")
             resolution = gr.Radio(choices=["Short (1080x1920)", "Full HD (1920x1080)"],
@@ -788,9 +771,12 @@ with gr.Blocks(title="🚀 Orbit Video Engine") as demo:
                 subtitle_position = gr.Radio(choices=["center", "bottom", "top"], label="Subtitle Position", value="center")
             gr.Markdown("## 4. Output")
             generate_video_btn = gr.Button("🎬 Generate Video")
             video_preview = gr.Video(label="Generated Video")
             download_video_file = gr.File(label="Download Video", interactive=False)
-    # Interactions
     generate_script_btn.click(
         fn=generate_script_clips,
         inputs=[topic_input, full_script],
@@ -808,12 +794,14 @@ with gr.Blocks(title="🚀 Orbit Video Engine") as demo:
         inputs=[clip_editor_container],
         outputs=[clip_editor_container]
     )
     generate_video_btn.click(
         fn=generate_final_video,
         inputs=[topic_input, full_script, clip_data_storage, resolution, render_speed,
                 video_clip_percent, zoom_pan, bg_music_file, bg_music_volume,
                 subtitle_enabled, font_dropdown, font_size, outline_width, font_color, outline_color, subtitle_position],
-        outputs=[video_preview]
     )
 demo.launch(share=True)

 """
+Full Code: Orbit Video Engine with Dynamic Clip Editor and Video Output Download
+This script implements a Gradio Blocks UI that:
+  1. Accepts a video topic or a full script.
+  2. Generates a documentary-style script using an AI API.
+  3. Parses the script into clip data.
+  4. Dynamically creates an editor for each clip, where users can:
+      - Edit the visual prompt.
+      - Edit the TTS (narration) text.
+      - Upload their own image/video to override the generated media.
+  5. Provides video settings (resolution, render speed, clip preferences, background music, subtitle settings).
+  6. Renders the video and displays it so you can preview it and download the final MP4 file.
+Make sure you have all required dependencies installed.
 """
+# ------------------- IMPORTS -------------------
 from kokoro import KPipeline
 import soundfile as sf
 import torch
 import os, time, random, math, json, tempfile, shutil, re, requests
+from moviepy.editor import (VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
+                            CompositeVideoClip, TextClip, CompositeAudioClip)
 import moviepy.video.fx.all as vfx
 import moviepy.config as mpy_config
 from pydub import AudioSegment
+from PIL import Image
 import numpy as np
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 import cv2
 import gradio as gr
+# ------------------- GLOBAL CONFIGURATION -------------------
+pipeline = KPipeline(lang_code='a')  # American English voice (uses 'af_heart')
 mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
 OPENROUTER_API_KEY = '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'
 OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/91.0.4472.124 Safari/537.36"
+# These globals will be set for each run
 TARGET_RESOLUTION = None
 CAPTION_COLOR = None
 TEMP_FOLDER = None
+# ------------------- HELPER FUNCTIONS -------------------
 def generate_script(user_input):
+    """Generate a documentary script using the OpenRouter API."""
     headers = {
         'Authorization': f'Bearer {OPENROUTER_API_KEY}',
         'HTTP-Referer': 'https://your-domain.com',
 No Special Formatting:
+No bold, italics, or special characters. You are an assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 Generalized Search Terms:
 def parse_script(script_text):
     """
+    Parse the generated script into a list of elements.
+    Each clip (segment) is represented as a pair of elements:
+      - A media element with a 'prompt'
+      - A TTS element with narration text and duration
     """
     sections = {}
     current_title = None
     return None
 def search_google_images(query):
+    """Search for images on Google Images (useful for news-like queries)."""
     try:
         search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
         headers = {"User-Agent": USER_AGENT}
         return None
 def download_image(image_url, filename):
+    """Download an image from a URL to a local file and verify it."""
     try:
         headers = {"User-Agent": USER_AGENT}
+        print(f"Downloading image from: {image_url}")
         response = requests.get(image_url, headers=headers, stream=True, timeout=15)
         response.raise_for_status()
         with open(filename, 'wb') as f:
             if img.mode != 'RGB':
                 img = img.convert('RGB')
                 img.save(filename)
             return filename
         except Exception as e_validate:
+            print(f"Invalid image file: {e_validate}")
             if os.path.exists(filename):
                 os.remove(filename)
             return None
         with open(filename, 'wb') as f:
             for chunk in response.iter_content(chunk_size=8192):
                 f.write(chunk)
         return filename
     except Exception as e:
         print(f"Video download error: {e}")
 def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
     """
     Generate a visual asset for the clip.
+    If a user_image (custom media) is provided, it is used directly.
+    Otherwise, use Pexels (or Google Images for news queries) with fallbacks.
     """
     safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
     if user_image is not None:
+        print(f"Using custom media for prompt: {prompt}")
         return {"path": user_image, "asset_type": "image"}
     if "news" in prompt.lower():
         image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
         image_url = search_google_images(prompt)
         if image_url:
             downloaded_fallback = download_image(fallback_url, fallback_file)
             if downloaded_fallback:
                 return {"path": downloaded_fallback, "asset_type": "image"}
+    print(f"Failed to generate asset for prompt: {prompt}")
     return None
 def generate_silent_audio(duration, sample_rate=24000):
+    """Generate a silent WAV audio file of given duration."""
     num_samples = int(duration * sample_rate)
     silence = np.zeros(num_samples, dtype=np.float32)
     silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
     return silent_path
 def generate_tts(text, voice):
+    """Generate TTS audio using Kokoro with fallback to gTTS."""
     safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
     file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
     if os.path.exists(file_path):
             return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
 def apply_kenburns_effect(clip, target_resolution, effect_type=None):
+    """Apply a Ken Burns effect to the clip (used for images)."""
     target_w, target_h = target_resolution
     clip_aspect = clip.w / clip.h
     target_aspect = target_w / target_h
     if effect_type is None or effect_type == "random":
         effect_type = random.choice(available_effects)
     if effect_type == "zoom-in":
+        start_zoom = 0.9; end_zoom = 1.1; start_center = (new_width/2, new_height/2); end_center = start_center
     elif effect_type == "zoom-out":
+        start_zoom = 1.1; end_zoom = 0.9; start_center = (new_width/2, new_height/2); end_center = start_center
     elif effect_type == "pan-left":
+        start_zoom = 1.0; end_zoom = 1.0; start_center = (max_offset_x+target_w/2, (max_offset_y//2)+target_h/2); end_center = (target_w/2, (max_offset_y//2)+target_h/2)
     elif effect_type == "pan-right":
+        start_zoom = 1.0; end_zoom = 1.0; start_center = (target_w/2, (max_offset_y//2)+target_h/2); end_center = (max_offset_x+target_w/2, (max_offset_y//2)+target_h/2)
     elif effect_type == "up-left":
+        start_zoom = 1.0; end_zoom = 1.0; start_center = (max_offset_x+target_w/2, max_offset_y+target_h/2); end_center = (target_w/2, target_h/2)
     else:
+        raise ValueError(f"Unsupported effect: {effect_type}")
     def transform_frame(get_frame, t):
         frame = get_frame(t)
         ratio = t / clip.duration if clip.duration > 0 else 0
         crop_h = int(target_h / current_zoom)
         current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
         current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
+        min_center_x = crop_w / 2; max_center_x = new_width - crop_w / 2
+        min_center_y = crop_h / 2; max_center_y = new_height - crop_h / 2
         current_center_x = max(min_center_x, min(current_center_x, max_center_x))
         current_center_y = max(min_center_y, min(current_center_y, max_center_y))
         cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
     return clip
 def find_mp3_files():
+    """Search for an MP3 file in the current directory (for background music)."""
     mp3_files = []
     for root, dirs, files in os.walk('.'):
         for file in files:
             if file.endswith('.mp3'):
+                mp3_files.append(os.path.join(root, file))
     return mp3_files[0] if mp3_files else None
 def add_background_music(final_video, bg_music_volume=0.08):
+    """Add background music to the final video if an MP3 file is found."""
     try:
         bg_music_path = find_mp3_files()
         if bg_music_path and os.path.exists(bg_music_path):
         return final_video
 def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
+    """Create a video clip from a media asset and TTS audio, and add subtitles if enabled."""
     try:
         if not os.path.exists(media_path) or not os.path.exists(tts_path):
             return None
         return None
 def fix_imagemagick_policy():
+    """Attempt to modify ImageMagick security policies if needed."""
     try:
         policy_paths = [
             "/etc/ImageMagick-6/policy.xml",
 def generate_video(user_input, resolution, caption_option):
     """
     Original video generation function.
+    This version uses the generated script to create video clips.
+    (Integration with custom clip edits could be extended further.)
     """
     global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
     if resolution == "Full HD (1920x1080)":
         TARGET_RESOLUTION = (1920, 1080)
     CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
     TEMP_FOLDER = tempfile.mkdtemp()
+    fix_imagemagick_policy()
     print("Generating script from API...")
     script = generate_script(user_input)
     if not script:
     shutil.rmtree(TEMP_FOLDER)
     return OUTPUT_VIDEO_FILENAME
+# ------------------- NEW CALLBACKS FOR THE ADVANCED UI -------------------
+# Global variable to store clip data from the script generation.
 generated_clip_data = []
 def generate_script_clips(topic_input, full_script):
     """
+    Callback when "📝 Generate Script & Load Clips" is clicked.
+    Uses the full_script if provided; otherwise, generates based on the topic.
+    Returns the raw generated script and a JSON string containing the clip data.
     """
     input_text = full_script if full_script.strip() != "" else topic_input
     script = generate_script(input_text)
             clip_info = {
                 "prompt": media_elem.get("prompt", ""),
                 "narration": tts_elem.get("text", ""),
+                "custom_media": ""  # initially empty; user may upload a file later
             }
             clip_list.append(clip_info)
     global generated_clip_data
 def update_clip_editor(clip_json):
     """
+    Dynamically build a UI editor for each clip.
+    Each editor contains:
+       - A textbox to edit the visual prompt.
+       - A textbox to edit the TTS text.
+       - A File uploader to allow custom media upload.
+    Returns a list of dynamically generated Accordions.
     """
     clip_list = json.loads(clip_json)
     editors = []
+    # Create a container for all clip editors.
     for idx, clip in enumerate(clip_list, start=1):
+        with gr.Accordion(label=f"Clip {idx}: {clip['prompt']}", open=(idx<=2)) as acc:
             prompt_box = gr.Textbox(label="Visual Prompt", value=clip["prompt"])
+            tts_box = gr.Textbox(label="TTS Text", value=clip["narration"], lines=3)
+            custom_media_box = gr.File(label="Upload custom image/video", file_types=[".jpg", ".png", ".mp4"])
+            # Pack values in a dictionary and return them later (here we simply display the components).
+            # In a full integration you could use additional state to track these values.
+            editors.append(acc)
     return editors
 def generate_final_video(topic_input, full_script, clip_data_json, resolution, render_speed,
                          subtitle_enabled, font_dropdown, font_size, outline_width,
                          font_color, outline_color, subtitle_position):
     """
+    Callback when "🎬 Generate Video" is clicked.
+    In a full implementation you would update each clip from the dynamic editors.
+    Here, for demonstration purposes, we simply call the original generate_video function.
+    The final video file is returned and used both for preview and download.
     """
     print("Final settings:")
     print(f"Resolution: {resolution}, Render Speed: {render_speed}, Video Clip %: {video_clip_percent}, Zoom/Pan: {zoom_pan}")
     if bg_music_file is not None:
         print("Custom background music provided.")
     video_file = generate_video(topic_input, resolution, "Yes")
+    # Return the file path for both the video preview and the download component.
+    return video_file, video_file
+# ------------------- GRADIO BLOCKS UI -------------------
 with gr.Blocks(title="🚀 Orbit Video Engine") as demo:
     with gr.Row():
         # Column 1: Content Input & Script Generation
         with gr.Column(scale=1):
             gr.Markdown("## 1. Content Input")
             topic_input = gr.Textbox(label="Topic Input", placeholder="Enter your video topic here...")
+            full_script = gr.Textbox(label="Or Paste Full Script", placeholder="Paste full script (using [Title] etc.)", lines=5)
             generate_script_btn = gr.Button("📝 Generate Script & Load Clips")
             generated_script_disp = gr.Textbox(label="Generated Script", interactive=False, visible=False)
+            # Hidden storage for clip JSON data
             clip_data_storage = gr.Textbox(visible=False)
+        # Column 2: Dynamic Clip Editor
         with gr.Column(scale=1):
             gr.Markdown("## 2. Edit Clips")
+            gr.Markdown("Modify each clip's Visual Prompt, TTS Text or upload your own media.")
             clip_editor_container = gr.Column(visible=False)
+        # Column 3: Video Settings & Output
         with gr.Column(scale=1):
             gr.Markdown("## 3. Video Settings")
             resolution = gr.Radio(choices=["Short (1080x1920)", "Full HD (1920x1080)"],
                 subtitle_position = gr.Radio(choices=["center", "bottom", "top"], label="Subtitle Position", value="center")
             gr.Markdown("## 4. Output")
             generate_video_btn = gr.Button("🎬 Generate Video")
+            # Two outputs: one for video preview, one for file download
             video_preview = gr.Video(label="Generated Video")
             download_video_file = gr.File(label="Download Video", interactive=False)
+    # ------------------- INTERACTIONS -------------------
+    # When "Generate Script & Load Clips" is clicked:
     generate_script_btn.click(
         fn=generate_script_clips,
         inputs=[topic_input, full_script],
         inputs=[clip_editor_container],
         outputs=[clip_editor_container]
     )
+    # When "Generate Video" is clicked, call the video generation callback.
     generate_video_btn.click(
         fn=generate_final_video,
         inputs=[topic_input, full_script, clip_data_storage, resolution, render_speed,
                 video_clip_percent, zoom_pan, bg_music_file, bg_music_volume,
                 subtitle_enabled, font_dropdown, font_size, outline_width, font_color, outline_color, subtitle_position],
+        outputs=[video_preview, download_video_file]
     )
 demo.launch(share=True)