testdeep123 commited on
Commit
c490f2c
·
verified ·
1 Parent(s): 47a0253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +598 -370
app.py CHANGED
@@ -1,404 +1,632 @@
 
1
  # -*- coding: utf-8 -*-
2
- from kokoro import KPipeline
3
- import soundfile as sf
4
- import torch
 
 
 
 
5
  import os
6
- from moviepy.editor import (
7
- VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
8
- CompositeVideoClip, CompositeAudioClip, TextClip
9
- )
10
- import moviepy.video.fx.all as vfx
11
- import moviepy.config as mpy_config
12
- from PIL import Image
13
- import tempfile
14
- import random
15
- import cv2
16
- import math
17
- import requests
18
  import re
19
  import time
20
- import numpy as np
21
- from bs4 import BeautifulSoup
22
- from urllib.parse import quote
23
- import gradio as gr
24
  import shutil
25
  import traceback
 
 
 
 
 
26
 
27
- # --- Initialize Kokoro TTS pipeline ---
28
- pipeline = None
29
- def initialize_kokoro():
30
- global pipeline
31
- if pipeline is None:
32
- try:
33
- print("Initializing Kokoro TTS pipeline (lang_code='a')...")
34
- pipeline = KPipeline(lang_code='a')
35
- print("Kokoro TTS pipeline initialized successfully.")
36
- except Exception as e:
37
- print(f"FATAL ERROR initializing Kokoro pipeline: {e}")
38
- print("TTS generation will not be available.")
39
- pipeline = None
40
- return pipeline
41
 
42
- # Attempt initialization once at the start
43
- initialize_kokoro()
 
 
 
 
 
 
44
 
45
- # --- Configure ImageMagick ---
46
  try:
47
- imagemagick_path = None
48
- common_paths = ["/usr/bin/convert", "/usr/local/bin/convert", "/opt/homebrew/bin/convert"]
49
- for path in common_paths:
50
- if os.path.exists(path):
51
- imagemagick_path = path
52
- break
53
- if imagemagick_path:
54
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": imagemagick_path})
55
- print(f"ImageMagick path set to: {imagemagick_path}")
56
- elif not any(shutil.which(cmd) for cmd in ["convert", "magick"]):
57
- print("Warning: ImageMagick 'convert' or 'magick' command not found in common paths or system PATH.")
58
- print(" TextClip captions requiring ImageMagick may fail.")
59
- except Exception as e:
60
- print(f"Warning: Error configuring ImageMagick: {e}. TextClip captions might fail.")
61
-
62
- # --- Global Configuration ---
63
- PEXELS_API_KEY = os.getenv('PEXELS_API_KEY', 'your_pexels_api_key_here')
64
- OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', 'your_openrouter_api_key_here')
65
-
66
- OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
67
- OUTPUT_VIDEO_FILENAME = "final_video.mp4"
68
- USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
69
-
70
- TARGET_RESOLUTION = None
71
- CAPTION_COLOR = None
72
- TEMP_FOLDER = None
73
-
74
- # --- Helper Functions ---
75
-
76
- def generate_script(user_input):
77
- """Generates script using OpenRouter API."""
78
- if not OPENROUTER_API_KEY or 'your_openrouter_api_key_here' in OPENROUTER_API_KEY:
79
- print("Error: OpenRouter API Key not set or looks invalid.")
80
- return "Error: OpenRouter API Key not configured."
81
-
82
- headers = {
83
- 'Authorization': f'Bearer {OPENROUTER_API_KEY}',
84
- 'HTTP-Referer': 'http://localhost:7860',
85
- 'X-Title': 'AI Documentary Maker'
86
  }
87
-
88
- prompt = f"""Short Documentary Script GeneratorInstructions:
89
- [Previous prompt content remains exactly the same...]
90
- Now here is the Topic/scrip: {user_input}
91
- """
92
-
93
- data = {
94
- 'model': OPENROUTER_MODEL,
95
- 'messages': [{'role': 'user', 'content': prompt}],
96
- 'temperature': 0.4,
97
- 'max_tokens': 1024
98
- }
99
-
100
- try:
101
- response = requests.post('https://openrouter.ai/api/v1/chat/completions',
102
- headers=headers, json=data, timeout=60)
103
- response.raise_for_status()
104
- response_data = response.json()
 
 
 
 
 
 
 
 
 
105
 
106
- if 'choices' in response_data and len(response_data['choices']) > 0:
107
- script_content = response_data['choices'][0]['message']['content'].strip()
108
- if '[' in script_content and ']' in script_content:
109
- print("Script generated successfully.")
110
- return script_content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  else:
112
- print("Warning: Generated script missing expected format '[Title] Narration'.")
113
- return script_content
114
- else:
115
- print(f"API Error: Unexpected response format from OpenRouter: {response_data}")
116
- return "Error: Could not parse script from API response."
117
-
118
- except requests.exceptions.Timeout:
119
- print("API Error: Request to OpenRouter timed out.")
120
- return "Error: Script generation timed out."
121
- except requests.exceptions.RequestException as e:
122
- print(f"API Error: Request failed: {e}")
123
- error_details = f"Status Code: {e.response.status_code}, Response: {e.response.text}" if e.response else str(e)
124
- print(f" Details: {error_details}")
125
- return f"Error: Failed connect to script generation service ({e.response.status_code if e.response else 'N/A'})."
126
- except Exception as e:
127
- print(f"Error during script generation: {e}")
128
- traceback.print_exc()
129
- return f"Error: Unexpected error during script generation."
130
-
131
- [Rest of the functions with proper formatting...]
132
-
133
- def generate_video_from_script(script, resolution, caption_option, music_file, fps, preset, video_probability):
134
- """Generates the final video from script and options."""
135
- global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
136
- start_time = time.time()
137
- print("\n--- Starting Video Generation ---")
138
- print(f" Options: Res={resolution}, Caps={caption_option}, FPS={fps}, Preset={preset}, VidProb={video_probability:.2f}")
139
- if music_file:
140
- print(f" Music File: {os.path.basename(music_file)}")
141
-
142
- # Setup Resolution
143
- if resolution == "Full (1920x1080)":
144
- TARGET_RESOLUTION = (1920, 1080)
145
- elif resolution == "Short (1080x1920)":
146
- TARGET_RESOLUTION = (1080, 1920)
147
- else:
148
- TARGET_RESOLUTION = (1080, 1920)
149
- print("Warning: Unknown resolution, defaulting to Short.")
150
 
151
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
152
-
153
- # Setup Temp Folder
154
- try:
155
- TEMP_FOLDER = tempfile.mkdtemp(prefix="aivideo_")
156
- print(f"Temp folder: {TEMP_FOLDER}")
157
- except Exception as e:
158
- print(f"Error creating temp folder: {e}")
159
- return None, 0, 0
160
-
161
- # ImageMagick Policy Fix (optional)
162
- if CAPTION_COLOR != "transparent":
163
- fix_imagemagick_policy()
164
-
165
- # Parse Script
166
- print("Parsing script...")
167
- elements = parse_script(script)
168
- if not elements:
169
- print("Error: Failed to parse script.")
170
- shutil.rmtree(TEMP_FOLDER)
171
- return None, 0, 0
172
-
173
- paired_elements = []
174
- for i in range(0, len(elements), 2):
175
- if i + 1 < len(elements) and elements[i]['type'] == 'media' and elements[i+1]['type'] == 'tts':
176
- paired_elements.append((elements[i], elements[i+1]))
177
- else:
178
- print(f"Warning: Skipping mismatched element pair at index {i}.")
179
-
180
- total_segments = len(paired_elements)
181
- if total_segments == 0:
182
- print("Error: No valid segments found.")
183
- shutil.rmtree(TEMP_FOLDER)
184
- return None, 0, 0
185
-
186
- # Generate Clips
187
- clips = []
188
- successful_segments = 0
189
 
190
- for idx, (media_elem, tts_elem) in enumerate(paired_elements):
191
- segment_start_time = time.time()
192
- print(f"\n--- Processing Segment {idx+1}/{total_segments} ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- media_asset = generate_media(media_elem['prompt'], video_probability, idx, total_segments)
195
- if not media_asset or not media_asset.get('path'):
196
- print("Error: Failed media. Skipping segment.")
197
- continue
198
-
199
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice']) # voice ignored
200
- if not tts_path:
201
- print("Error: Failed TTS. Skipping segment.")
202
- if os.path.exists(media_asset['path']):
203
- try:
204
- os.remove(media_asset['path'])
205
- except OSError:
206
- pass
207
- continue
208
-
209
- clip = create_clip(
210
- media_path=media_asset['path'],
211
- asset_type=media_asset['asset_type'],
212
- tts_path=tts_path,
213
- narration_text=tts_elem['text'],
214
- segment_index=idx
215
- )
216
-
217
- if clip:
218
- clips.append(clip)
219
- successful_segments += 1
220
- print(f"Segment {idx+1} processed in {time.time() - segment_start_time:.2f}s.")
221
- else:
222
- print("Error: Clip creation failed. Skipping segment.")
223
- if os.path.exists(media_asset['path']):
224
- try:
225
- os.remove(media_asset['path'])
226
- except OSError:
227
- pass
228
- if os.path.exists(tts_path):
229
- try:
230
- os.remove(tts_path)
231
- except OSError:
232
- pass
233
- continue
234
-
235
- # Final Assembly
236
- final_video = None
237
- output_path = None
238
 
239
- if not clips:
240
- print("Error: No clips created.")
241
- return None, total_segments, successful_segments
242
 
243
- if successful_segments < total_segments:
244
- print(f"\nWARNING: Only {successful_segments}/{total_segments} segments succeeded.")
245
-
246
- print(f"\nConcatenating {len(clips)} clips...")
247
- try:
248
- final_video = concatenate_videoclips(clips, method="compose")
249
- print("Concatenation complete.")
250
-
251
- # Close individual clips after successful concatenation
252
- print("Closing individual segment clips...")
253
- for c in clips:
254
- try:
255
- c.close()
256
- except Exception as e:
257
- print(f"Minor error closing segment clip: {e}")
258
-
259
- # Add Music
260
- if music_file:
261
- final_video = add_background_music(final_video, music_file, bg_music_volume=0.08)
262
-
263
- # Export
264
- output_path = OUTPUT_VIDEO_FILENAME
265
- print(f"Exporting final video to '{output_path}' (FPS: {fps}, Preset: {preset})...")
266
- final_video.write_videofile(
267
- output_path,
268
- codec='libx264',
269
- audio_codec='aac',
270
- fps=fps,
271
- preset=preset,
272
- threads=os.cpu_count() or 4,
273
- logger='bar'
274
- )
275
- print(f"\nFinal video saved: '{output_path}'")
276
- print(f"Total generation time: {time.time() - start_time:.2f} seconds.")
277
-
278
- except Exception as e:
279
- print(f"FATAL Error during final assembly/export: {e}")
280
- traceback.print_exc()
281
- output_path = None
282
- finally:
283
- print("Final cleanup...")
284
- if final_video:
285
  try:
286
- final_video.close()
 
 
 
 
 
 
 
 
 
287
  except Exception as e:
288
- print(f"Minor error closing final video: {e}")
 
289
 
290
- clips.clear()
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
- if TEMP_FOLDER and os.path.exists(TEMP_FOLDER):
293
- print(f"Removing temp folder: {TEMP_FOLDER}")
294
- try:
295
- shutil.rmtree(TEMP_FOLDER)
296
- print("Temp folder removed.")
297
- except Exception as e:
298
- print(f"Warning: Could not remove temp folder {TEMP_FOLDER}: {e}")
299
-
300
- return output_path, total_segments, successful_segments
301
-
302
- # --- Gradio Interface ---
303
- with gr.Blocks(title="AI Documentary Video Generator", theme=gr.themes.Soft()) as demo:
304
- gr.Markdown("# Create a Funny AI Documentary Video")
305
- gr.Markdown("Concept -> Generate Script -> Edit (Optional) -> Configure -> Generate Video!")
306
- gr.Markdown("---")
307
-
308
- with gr.Row():
309
- with gr.Column(scale=1):
310
- gr.Markdown("### 1. Concept & Script")
311
- concept = gr.Textbox(label="Video Concept/Topic", placeholder="e.g., 'The secret life of squirrels'")
312
- generate_script_btn = gr.Button("📝 Generate Script", variant="secondary")
313
- script = gr.Textbox(label="Script (Edit if needed)", lines=15, placeholder="Generated script...", interactive=True)
314
- with gr.Column(scale=1):
315
- gr.Markdown("### 2. Video Options")
316
- resolution = gr.Radio(["Full (1920x1080)", "Short (1080x1920)"], label="Resolution", value="Short (1080x1920)")
317
- captions = gr.Radio(["Yes", "No"], label="Add Captions?", value="Yes")
318
- music = gr.Audio(label="Background Music (Optional)", type="filepath")
319
- gr.Markdown("### 3. Advanced Settings")
320
- fps_slider = gr.Slider(minimum=15, maximum=60, step=1, value=30, label="Output FPS")
321
- preset_dropdown = gr.Dropdown(
322
- ["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"],
323
- value="veryfast",
324
- label="Encoding Preset",
325
- info="Faster=Quicker, Larger File"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  )
327
- video_prob_slider = gr.Slider(
328
- minimum=0.0,
329
- maximum=1.0,
330
- step=0.05,
331
- value=0.45,
332
- label="Video Clip Probability",
333
- info="Chance (0-1) to use video per segment"
 
 
 
 
 
 
 
 
 
 
 
334
  )
335
- gr.Markdown("### 4. Generate")
336
- generate_video_btn = gr.Button("🎬 Generate Video", variant="primary")
337
-
338
- with gr.Row():
339
- video_output = gr.Video(label="Generated Video", interactive=False)
340
- with gr.Row():
341
- status_message = gr.Markdown("")
342
-
343
- # Event Handlers
344
- def on_generate_script(concept_text):
345
- if not concept_text:
346
- return gr.update(value="", placeholder="Please enter a concept first."), gr.Markdown("⚠️ Please enter a video concept.")
 
 
 
 
347
 
348
- yield gr.update(), gr.Markdown("⏳ Generating script...")
349
- script_text = generate_script(concept_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
- if script_text and "Error:" not in script_text:
352
- yield gr.update(value=script_text), gr.Markdown("✅ Script generated!")
353
- elif script_text and "Error:" in script_text:
354
- yield gr.update(value=""), gr.Markdown(f"❌ Script Error: {script_text}")
355
- else:
356
- yield gr.update(value=""), gr.Markdown("❌ Script generation failed. Check logs.")
357
-
358
- def on_generate_video(script_text, resolution_choice, captions_choice, music_path, fps, preset, video_probability):
359
- if not script_text or "Error:" in script_text or "Failed to generate script" in script_text:
360
- yield None, gr.Markdown("❌ Cannot generate: Invalid script.")
361
- return
362
 
363
- if not PEXELS_API_KEY or 'your_pexels_api_key_here' in PEXELS_API_KEY:
364
- yield None, gr.Markdown("❌ Cannot generate: Pexels API Key missing/invalid.")
365
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- if pipeline is None:
368
- yield None, gr.Markdown(" Cannot generate: Kokoro TTS failed initialization. Check console.")
369
- return
 
 
 
 
 
 
 
 
 
370
 
371
- yield None, gr.Markdown("⏳ Starting video generation... Check console for detailed progress.")
 
 
 
 
 
 
 
 
 
372
 
373
- video_path, total_segments, successful_segments = generate_video_from_script(
374
- script_text, resolution_choice, captions_choice, music_path, fps, preset, video_probability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  )
376
 
377
- final_status = ""
378
- if video_path and os.path.exists(video_path):
379
- final_status = f"✅ Video generated: {os.path.basename(video_path)}!"
380
- if successful_segments < total_segments:
381
- final_status += f" (Warning: {total_segments - successful_segments} out of {total_segments} segments failed)"
382
- yield video_path, gr.Markdown(final_status)
383
- else:
384
- final_status = "❌ Video generation failed. Please check the console logs for errors."
385
- yield None, gr.Markdown(final_status)
386
-
387
- # Connect buttons
388
- generate_script_btn.click(
389
- fn=on_generate_script,
390
- inputs=[concept],
391
- outputs=[script, status_message],
392
- api_name="generate_script"
393
- )
394
 
395
- generate_video_btn.click(
396
- fn=on_generate_video,
397
- inputs=[script, resolution, captions, music, fps_slider, preset_dropdown, video_prob_slider],
398
- outputs=[video_output, status_message],
399
- api_name="generate_video"
400
- )
401
 
402
- # Launch App
403
- print("Starting Gradio Interface...")
404
- demo.queue().launch(share=True, debug=True)
 
1
+ #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
+ """
4
+ AI Documentary Video Generator
5
+ Version: 2.0
6
+ Author: AI Assistant
7
+ Description: Creates documentary-style videos from text prompts using TTS, media APIs, and video processing
8
+ """
9
+
10
  import os
11
+ import sys
 
 
 
 
 
 
 
 
 
 
 
12
  import re
13
  import time
14
+ import random
15
+ import tempfile
 
 
16
  import shutil
17
  import traceback
18
+ import math
19
+ from typing import Optional, Tuple, Dict, List, Union
20
+ from dataclasses import dataclass
21
+ from concurrent.futures import ThreadPoolExecutor
22
+ from urllib.parse import quote
23
 
24
+ # Third-party imports
25
+ import numpy as np
26
+ import cv2
27
+ import requests
28
+ from PIL import Image
29
+ import soundfile as sf
30
+ import torch
31
+ from bs4 import BeautifulSoup
32
+ import gradio as gr
 
 
 
 
 
33
 
34
+ # MoviePy imports
35
+ from moviepy.editor import (
36
+ VideoFileClip, AudioFileClip, ImageClip,
37
+ concatenate_videoclips, CompositeVideoClip,
38
+ CompositeAudioClip, TextClip
39
+ )
40
+ import moviepy.video.fx.all as vfx
41
+ import moviepy.config as mpy_config
42
 
43
+ # Custom imports
44
  try:
45
+ from kokoro import KPipeline
46
+ except ImportError:
47
+ print("Warning: Kokoro TTS not available. TTS features will be disabled.")
48
+
49
+ # ======================
50
+ # CONSTANTS & CONFIG
51
+ # ======================
52
+ DEFAULT_CONFIG = {
53
+ 'PEXELS_API_KEY': os.getenv('PEXELS_API_KEY', 'your_pexels_key_here'),
54
+ 'OPENROUTER_API_KEY': os.getenv('OPENROUTER_API_KEY', 'your_openrouter_key_here'),
55
+ 'OUTPUT_VIDEO': "documentary_output.mp4",
56
+ 'USER_AGENT': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
57
+ 'MAX_RETRIES': 3,
58
+ 'RETRY_DELAY': 2,
59
+ 'MAX_VIDEO_DURATION': 600, # 10 minutes
60
+ 'TTS_SAMPLE_RATE': 24000,
61
+ 'DEFAULT_VOICE': 'en',
62
+ 'CAPTION_FONT': 'Arial-Bold',
63
+ 'CAPTION_FONT_SIZES': {
64
+ '1080p': 40,
65
+ '720p': 32,
66
+ '480p': 24
67
+ },
68
+ 'BACKGROUND_MUSIC_VOLUME': 0.08,
69
+ 'DEFAULT_FPS': 30,
70
+ 'VIDEO_PROBABILITY': 0.45,
71
+ 'RESOLUTIONS': {
72
+ '1080p': (1920, 1080),
73
+ '720p': (1280, 720),
74
+ '480p': (854, 480),
75
+ 'vertical_1080p': (1080, 1920),
76
+ 'vertical_720p': (720, 1280)
 
 
 
 
 
 
 
77
  }
78
+ }
79
+
80
+ # ======================
81
+ # CORE CLASSES
82
+ # ======================
83
+
84
+ @dataclass
85
+ class VideoSegment:
86
+ media_path: str
87
+ tts_path: str
88
+ narration: str
89
+ duration: float
90
+ media_type: str # 'image' or 'video'
91
+ effects: Dict
92
+ caption_style: Dict
93
+
94
+ class DocumentaryGenerator:
95
+ def __init__(self, config: Optional[Dict] = None):
96
+ self.config = config or DEFAULT_CONFIG
97
+ self.tts_pipeline = None
98
+ self.temp_dir = None
99
+ self.current_resolution = None
100
+ self.caption_color = None
101
+
102
+ # Initialize subsystems
103
+ self._initialize_tts()
104
+ self._configure_imagemagick()
105
 
106
+ def _initialize_tts(self):
107
+ """Initialize the TTS pipeline"""
108
+ try:
109
+ if 'KPipeline' in globals():
110
+ print("Initializing Kokoro TTS pipeline...")
111
+ self.tts_pipeline = KPipeline(lang_code='a')
112
+ print("TTS pipeline ready")
113
+ except Exception as e:
114
+ print(f"Could not initialize TTS: {str(e)}")
115
+ self.tts_pipeline = None
116
+
117
+ def _configure_imagemagick(self):
118
+ """Configure ImageMagick paths"""
119
+ try:
120
+ common_paths = [
121
+ "/usr/bin/convert",
122
+ "/usr/local/bin/convert",
123
+ "/opt/homebrew/bin/convert"
124
+ ]
125
+ for path in common_paths:
126
+ if os.path.exists(path):
127
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": path})
128
+ print(f"ImageMagick configured: {path}")
129
+ break
130
  else:
131
+ print("ImageMagick not found - text rendering may be limited")
132
+ except Exception as e:
133
+ print(f"ImageMagick config error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
+ def _create_temp_dir(self):
136
+ """Create a temporary working directory"""
137
+ if self.temp_dir and os.path.exists(self.temp_dir):
138
+ shutil.rmtree(self.temp_dir)
139
+ self.temp_dir = tempfile.mkdtemp(prefix="docgen_")
140
+ print(f"Created temp directory: {self.temp_dir}")
141
+ return self.temp_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ def _cleanup(self):
144
+ """Clean up temporary resources"""
145
+ if self.temp_dir and os.path.exists(self.temp_dir):
146
+ try:
147
+ shutil.rmtree(self.temp_dir)
148
+ print("Cleaned up temporary files")
149
+ except Exception as e:
150
+ print(f"Cleanup error: {str(e)}")
151
+
152
+ def generate_script(self, topic: str) -> str:
153
+ """Generate a documentary script using OpenRouter API"""
154
+ if not self.config['OPENROUTER_API_KEY']:
155
+ return "Error: OpenRouter API key not configured"
156
+
157
+ prompt = f"""Create a funny, engaging documentary script about {topic}.
158
+ Format each section with [TITLE] followed by narration text.
159
+ Keep narration concise (1-2 sentences per section).
160
+ Include at least 5 sections.
161
+ End with a humorous call-to-action."""
162
 
163
+ headers = {
164
+ 'Authorization': f'Bearer {self.config['OPENROUTER_API_KEY']}',
165
+ 'Content-Type': 'application/json'
166
+ }
167
+
168
+ data = {
169
+ "model": "mistralai/mistral-small-3.1-24b-instruct:free",
170
+ "messages": [{"role": "user", "content": prompt}],
171
+ "temperature": 0.7,
172
+ "max_tokens": 1024
173
+ }
174
+
175
+ try:
176
+ response = requests.post(
177
+ 'https://openrouter.ai/api/v1/chat/completions',
178
+ headers=headers,
179
+ json=data,
180
+ timeout=30
181
+ )
182
+ response.raise_for_status()
183
+ return response.json()['choices'][0]['message']['content']
184
+ except Exception as e:
185
+ return f"Error generating script: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ def _download_media(self, url: str, filename: str) -> Optional[str]:
188
+ """Download media file from URL"""
189
+ local_path = os.path.join(self.temp_dir, filename)
190
 
191
+ for attempt in range(self.config['MAX_RETRIES']):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  try:
193
+ with requests.get(url, stream=True, timeout=15) as r:
194
+ r.raise_for_status()
195
+ with open(local_path, 'wb') as f:
196
+ for chunk in r.iter_content(chunk_size=8192):
197
+ f.write(chunk)
198
+
199
+ # Validate downloaded file
200
+ if os.path.getsize(local_path) > 1024:
201
+ return local_path
202
+
203
  except Exception as e:
204
+ print(f"Download attempt {attempt + 1} failed: {str(e)}")
205
+ time.sleep(self.config['RETRY_DELAY'] * (attempt + 1))
206
 
207
+ return None
208
+
209
+ def _search_pexels_video(self, query: str) -> Optional[str]:
210
+ """Search for videos on Pexels"""
211
+ if not self.config['PEXELS_API_KEY']:
212
+ return None
213
+
214
+ headers = {'Authorization': self.config['PEXELS_API_KEY']}
215
+ params = {
216
+ 'query': query,
217
+ 'per_page': 15,
218
+ 'orientation': 'landscape'
219
+ }
220
 
221
+ try:
222
+ response = requests.get(
223
+ 'https://api.pexels.com/videos/search',
224
+ headers=headers,
225
+ params=params,
226
+ timeout=10
227
+ )
228
+ response.raise_for_status()
229
+
230
+ videos = response.json().get('videos', [])
231
+ if videos:
232
+ video_files = videos[0].get('video_files', [])
233
+ for file in video_files:
234
+ if file.get('quality') == 'hd':
235
+ return file.get('link')
236
+ except Exception as e:
237
+ print(f"Pexels search error: {str(e)}")
238
+
239
+ return None
240
+
241
+ def _generate_tts(self, text: str) -> Optional[str]:
242
+ """Generate TTS audio for narration"""
243
+ if not self.tts_pipeline:
244
+ return None
245
+
246
+ safe_name = re.sub(r'[^\w\-_]', '', text[:20]) + '.wav'
247
+ output_path = os.path.join(self.temp_dir, safe_name)
248
+
249
+ try:
250
+ audio_segments = []
251
+ for chunk in self.tts_pipeline(text, speed=1.0):
252
+ if isinstance(chunk, tuple):
253
+ chunk = chunk[-1] # Get audio data from tuple
254
+ audio_segments.append(chunk)
255
+
256
+ full_audio = np.concatenate(audio_segments)
257
+ sf.write(output_path, full_audio, self.config['TTS_SAMPLE_RATE'])
258
+ return output_path
259
+ except Exception as e:
260
+ print(f"TTS generation error: {str(e)}")
261
+ return None
262
+
263
+ def _create_video_segment(self, segment: VideoSegment) -> Optional[VideoClip]:
264
+ """Create a single video segment with media, audio, and effects"""
265
+ try:
266
+ # Load media
267
+ if segment.media_type == 'video':
268
+ media_clip = VideoFileClip(segment.media_path)
269
+ else:
270
+ media_clip = ImageClip(segment.media_path).set_duration(segment.duration)
271
+
272
+ # Apply effects
273
+ if segment.media_type == 'image':
274
+ media_clip = self._apply_kenburns(media_clip)
275
+ else:
276
+ media_clip = self._resize_clip(media_clip)
277
+
278
+ # Add audio
279
+ audio_clip = AudioFileClip(segment.tts_path)
280
+ media_clip = media_clip.set_audio(audio_clip)
281
+
282
+ # Add captions if enabled
283
+ if segment.caption_style.get('enabled', False):
284
+ media_clip = self._add_captions(media_clip, segment.narration, segment.caption_style)
285
+
286
+ return media_clip
287
+ except Exception as e:
288
+ print(f"Segment creation error: {str(e)}")
289
+ traceback.print_exc()
290
+ return None
291
+
292
+ def _apply_kenburns(self, clip: ImageClip) -> VideoClip:
293
+ """Apply Ken Burns effect to an image clip"""
294
+ try:
295
+ target_w, target_h = self.current_resolution
296
+ base_scale = 1.2
297
+
298
+ # Choose random effect
299
+ effects = {
300
+ 'zoom_in': {'start_scale': 1.0, 'end_scale': base_scale},
301
+ 'zoom_out': {'start_scale': base_scale, 'end_scale': 1.0},
302
+ 'pan_left': {'start_pos': (0.7, 0.5), 'end_pos': (0.3, 0.5)},
303
+ 'pan_right': {'start_pos': (0.3, 0.5), 'end_pos': (0.7, 0.5)}
304
+ }
305
+ effect = random.choice(list(effects.values()))
306
+
307
+ def transform(get_frame, t):
308
+ ratio = t / clip.duration
309
+ scale = effect['start_scale'] + (effect['end_scale'] - effect['start_scale']) * ratio
310
+ pos_x = effect['start_pos'][0] + (effect['end_pos'][0] - effect['start_pos'][0]) * ratio
311
+ pos_y = effect['start_pos'][1] + (effect['end_pos'][1] - effect['start_pos'][1]) * ratio
312
+
313
+ frame = get_frame(t)
314
+ h, w = frame.shape[:2]
315
+ crop_size = (int(w/scale), int(h/scale))
316
+
317
+ center_x = int(pos_x * w)
318
+ center_y = int(pos_y * h)
319
+
320
+ x = max(0, min(center_x - crop_size[0]//2, w - crop_size[0]))
321
+ y = max(0, min(center_y - crop_size[1]//2, h - crop_size[1]))
322
+
323
+ cropped = frame[y:y+crop_size[1], x:x+crop_size[0]]
324
+ return cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
325
+
326
+ return clip.fl(transform)
327
+ except Exception as e:
328
+ print(f"Ken Burns error: {str(e)}")
329
+ return self._resize_clip(clip)
330
+
331
+ def _resize_clip(self, clip: Union[VideoClip, ImageClip]) -> VideoClip:
332
+ """Resize clip to target resolution"""
333
+ try:
334
+ target_w, target_h = self.current_resolution
335
+ clip_aspect = clip.w / clip.h
336
+ target_aspect = target_w / target_h
337
+
338
+ if abs(clip_aspect - target_aspect) < 0.01:
339
+ return clip.resize((target_w, target_h))
340
+
341
+ if clip_aspect > target_aspect:
342
+ # Wider than target
343
+ new_height = target_h
344
+ new_width = int(clip.w * (new_height / clip.h))
345
+ resized = clip.resize(height=new_height)
346
+ crop = (resized.w - target_w) / 2
347
+ return resized.crop(x1=crop, y1=0, x2=crop+target_w, y2=target_h)
348
+ else:
349
+ # Taller than target
350
+ new_width = target_w
351
+ new_height = int(clip.h * (new_width / clip.w))
352
+ resized = clip.resize(width=new_width)
353
+ crop = (resized.h - target_h) / 2
354
+ return resized.crop(x1=0, y1=crop, x2=target_w, y2=crop+target_h)
355
+ except Exception as e:
356
+ print(f"Resize error: {str(e)}")
357
+ return clip
358
+
359
+ def _add_captions(self, clip: VideoClip, text: str, style: Dict) -> VideoClip:
360
+ """Add captions to a video clip"""
361
+ try:
362
+ words = text.split()
363
+ chunks = []
364
+ current_chunk = []
365
+ char_count = 0
366
+
367
+ # Split text into manageable chunks
368
+ for word in words:
369
+ if char_count + len(word) > 30 and current_chunk:
370
+ chunks.append(' '.join(current_chunk))
371
+ current_chunk = [word]
372
+ char_count = len(word)
373
+ else:
374
+ current_chunk.append(word)
375
+ char_count += len(word) + 1
376
+
377
+ if current_chunk:
378
+ chunks.append(' '.join(current_chunk))
379
+
380
+ # Create text clips
381
+ text_clips = []
382
+ duration_per_chunk = clip.duration / len(chunks)
383
+
384
+ for i, chunk in enumerate(chunks):
385
+ txt_clip = TextClip(
386
+ chunk,
387
+ fontsize=style['font_size'],
388
+ font=style['font'],
389
+ color=style['color'],
390
+ bg_color=style['bg_color'],
391
+ stroke_color=style['stroke_color'],
392
+ stroke_width=style['stroke_width'],
393
+ method='caption',
394
+ size=(style['max_width'], None)
395
+ ).set_start(i * duration_per_chunk).set_duration(duration_per_chunk)
396
+
397
+ txt_clip = txt_clip.set_position(('center', style['y_position']))
398
+ text_clips.append(txt_clip)
399
+
400
+ return CompositeVideoClip([clip] + text_clips)
401
+ except Exception as e:
402
+ print(f"Caption error: {str(e)}")
403
+ return clip
404
+
405
+ def generate_video(self, script: str, resolution: str, captions: bool,
406
+ music_path: Optional[str] = None) -> Optional[str]:
407
+ """Main video generation pipeline"""
408
+ start_time = time.time()
409
+
410
+ try:
411
+ # Setup
412
+ self._create_temp_dir()
413
+ self.current_resolution = self.config['RESOLUTIONS'].get(resolution, (1920, 1080))
414
+ self.caption_color = 'white' if captions else None
415
+
416
+ # Parse script into segments
417
+ segments = self._parse_script(script)
418
+ if not segments:
419
+ print("Error: No valid segments found in script")
420
+ return None
421
+
422
+ # Process segments in parallel
423
+ with ThreadPoolExecutor() as executor:
424
+ video_segments = list(executor.map(self._process_segment, segments))
425
+
426
+ # Combine segments
427
+ final_clip = concatenate_videoclips(
428
+ [s for s in video_segments if s is not None],
429
+ method="compose"
430
  )
431
+
432
+ # Add background music if provided
433
+ if music_path and os.path.exists(music_path):
434
+ music_clip = AudioFileClip(music_path).volumex(self.config['BACKGROUND_MUSIC_VOLUME'])
435
+ if music_clip.duration < final_clip.duration:
436
+ music_clip = music_clip.loop(duration=final_clip.duration)
437
+ final_clip = final_clip.set_audio(
438
+ CompositeAudioClip([final_clip.audio, music_clip])
439
+ )
440
+
441
+ # Export final video
442
+ output_path = self.config['OUTPUT_VIDEO']
443
+ final_clip.write_videofile(
444
+ output_path,
445
+ codec='libx264',
446
+ audio_codec='aac',
447
+ fps=self.config['DEFAULT_FPS'],
448
+ threads=os.cpu_count() or 4
449
  )
450
+
451
+ print(f"Video generated in {time.time() - start_time:.2f} seconds")
452
+ return output_path
453
+
454
+ except Exception as e:
455
+ print(f"Video generation failed: {str(e)}")
456
+ traceback.print_exc()
457
+ return None
458
+ finally:
459
+ self._cleanup()
460
+
461
+ def _parse_script(self, script: str) -> List[Dict]:
462
+ """Parse script into media and narration segments"""
463
+ segments = []
464
+ current_title = None
465
+ current_text = ""
466
 
467
+ for line in script.split('\n'):
468
+ line = line.strip()
469
+ if not line:
470
+ continue
471
+
472
+ # Check for section title
473
+ title_match = re.match(r'^\s*\[([^\]]+)\]\s*(.*)', line)
474
+ if title_match:
475
+ if current_title and current_text:
476
+ segments.append({
477
+ 'title': current_title,
478
+ 'text': current_text.strip()
479
+ })
480
+ current_title = title_match.group(1).strip()
481
+ current_text = title_match.group(2).strip() + " "
482
+ elif current_title:
483
+ current_text += line + " "
484
 
485
+ # Add the last segment
486
+ if current_title and current_text:
487
+ segments.append({
488
+ 'title': current_title,
489
+ 'text': current_text.strip()
490
+ })
 
 
 
 
 
491
 
492
+ return segments
493
+
494
+ def _process_segment(self, segment: Dict) -> Optional[VideoSegment]:
495
+ """Process a single script segment into a video segment"""
496
+ try:
497
+ # Get media
498
+ use_video = random.random() < self.config['VIDEO_PROBABILITY']
499
+ if use_video:
500
+ media_url = self._search_pexels_video(segment['title'])
501
+ media_type = 'video'
502
+ else:
503
+ media_url = self._search_pexels_image(segment['title'])
504
+ media_type = 'image'
505
+
506
+ if not media_url:
507
+ print(f"No media found for: {segment['title']}")
508
+ return None
509
+
510
+ # Download media
511
+ media_ext = '.mp4' if media_type == 'video' else '.jpg'
512
+ media_filename = f"media_{len(segment['title'])}_media_ext"
513
+ media_path = self._download_media(media_url, media_filename)
514
+
515
+ if not media_path:
516
+ print(f"Failed to download media for: {segment['title']}")
517
+ return None
518
+
519
+ # Generate TTS
520
+ tts_path = self._generate_tts(segment['text'])
521
+ if not tts_path:
522
+ print(f"Failed to generate TTS for: {segment['title']}")
523
+ return None
524
+
525
+ # Calculate duration based on TTS
526
+ tts_duration = AudioFileClip(tts_path).duration
527
+ duration = max(3.0, min(tts_duration * 1.1, 10.0)) # 3-10 seconds
528
+
529
+ # Prepare caption style
530
+ caption_style = {
531
+ 'enabled': self.caption_color is not None,
532
+ 'font_size': self._get_font_size(),
533
+ 'font': self.config['CAPTION_FONT'],
534
+ 'color': self.caption_color or 'white',
535
+ 'bg_color': 'rgba(0,0,0,0.5)',
536
+ 'stroke_color': 'black',
537
+ 'stroke_width': 1.5,
538
+ 'max_width': int(self.current_resolution[0] * 0.8),
539
+ 'y_position': int(self.current_resolution[1] * 0.8)
540
+ }
541
+
542
+ return VideoSegment(
543
+ media_path=media_path,
544
+ tts_path=tts_path,
545
+ narration=segment['text'],
546
+ duration=duration,
547
+ media_type=media_type,
548
+ effects={'type': 'random'},
549
+ caption_style=caption_style
550
+ )
551
 
552
+ except Exception as e:
553
+ print(f"Segment processing error: {str(e)}")
554
+ return None
555
+
556
+ def _get_font_size(self) -> int:
557
+ """Get appropriate font size for current resolution"""
558
+ if self.current_resolution[1] >= 1080:
559
+ return self.config['CAPTION_FONT_SIZES']['1080p']
560
+ elif self.current_resolution[1] >= 720:
561
+ return self.config['CAPTION_FONT_SIZES']['720p']
562
+ else:
563
+ return self.config['CAPTION_FONT_SIZES']['480p']
564
 
565
+ # ======================
566
+ # GRADIO INTERFACE
567
+ # ======================
568
+
569
+ def create_gradio_interface():
570
+ """Create the Gradio web interface"""
571
+ generator = DocumentaryGenerator()
572
+
573
+ with gr.Blocks(title="AI Documentary Maker", theme="soft") as app:
574
+ gr.Markdown("# AI Documentary Video Generator")
575
 
576
+ with gr.Row():
577
+ with gr.Column():
578
+ topic_input = gr.Textbox(label="Documentary Topic", placeholder="Enter your topic...")
579
+ generate_script_btn = gr.Button("Generate Script")
580
+ script_output = gr.Textbox(label="Generated Script", lines=10, interactive=True)
581
+
582
+ with gr.Accordion("Advanced Options", open=False):
583
+ resolution = gr.Dropdown(
584
+ list(generator.config['RESOLUTIONS'].keys()),
585
+ value="1080p",
586
+ label="Resolution"
587
+ )
588
+ captions = gr.Checkbox(
589
+ value=True,
590
+ label="Enable Captions"
591
+ )
592
+ music_input = gr.Audio(
593
+ label="Background Music",
594
+ type="filepath",
595
+ optional=True
596
+ )
597
+ video_prob = gr.Slider(
598
+ minimum=0.1,
599
+ maximum=1.0,
600
+ value=generator.config['VIDEO_PROBABILITY'],
601
+ label="Video Clip Probability"
602
+ )
603
+
604
+ generate_video_btn = gr.Button("Generate Video", variant="primary")
605
+
606
+ with gr.Column():
607
+ video_output = gr.Video(label="Generated Documentary")
608
+ status_output = gr.Textbox(label="Status", interactive=False)
609
+
610
+ # Event handlers
611
+ generate_script_btn.click(
612
+ fn=generator.generate_script,
613
+ inputs=[topic_input],
614
+ outputs=[script_output]
615
  )
616
 
617
+ generate_video_btn.click(
618
+ fn=generator.generate_video,
619
+ inputs=[script_output, resolution, captions, music_input],
620
+ outputs=[video_output],
621
+ api_name="generate"
622
+ )
 
 
 
 
 
 
 
 
 
 
 
623
 
624
+ return app
625
+
626
+ # ======================
627
+ # MAIN EXECUTION
628
+ # ======================
 
629
 
630
+ if __name__ == "__main__":
631
+ app = create_gradio_interface()
632
+ app.launch(server_name="0.0.0.0", server_port=7860)