Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,404 +1,632 @@
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
5 |
import os
|
6 |
-
|
7 |
-
VideoFileClip, AudioFileClip, ImageClip, concatenate_videoclips,
|
8 |
-
CompositeVideoClip, CompositeAudioClip, TextClip
|
9 |
-
)
|
10 |
-
import moviepy.video.fx.all as vfx
|
11 |
-
import moviepy.config as mpy_config
|
12 |
-
from PIL import Image
|
13 |
-
import tempfile
|
14 |
-
import random
|
15 |
-
import cv2
|
16 |
-
import math
|
17 |
-
import requests
|
18 |
import re
|
19 |
import time
|
20 |
-
import
|
21 |
-
|
22 |
-
from urllib.parse import quote
|
23 |
-
import gradio as gr
|
24 |
import shutil
|
25 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
except Exception as e:
|
37 |
-
print(f"FATAL ERROR initializing Kokoro pipeline: {e}")
|
38 |
-
print("TTS generation will not be available.")
|
39 |
-
pipeline = None
|
40 |
-
return pipeline
|
41 |
|
42 |
-
#
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
#
|
46 |
try:
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
print("Error: OpenRouter API Key not set or looks invalid.")
|
80 |
-
return "Error: OpenRouter API Key not configured."
|
81 |
-
|
82 |
-
headers = {
|
83 |
-
'Authorization': f'Bearer {OPENROUTER_API_KEY}',
|
84 |
-
'HTTP-Referer': 'http://localhost:7860',
|
85 |
-
'X-Title': 'AI Documentary Maker'
|
86 |
}
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
else:
|
112 |
-
print("
|
113 |
-
|
114 |
-
|
115 |
-
print(f"API Error: Unexpected response format from OpenRouter: {response_data}")
|
116 |
-
return "Error: Could not parse script from API response."
|
117 |
-
|
118 |
-
except requests.exceptions.Timeout:
|
119 |
-
print("API Error: Request to OpenRouter timed out.")
|
120 |
-
return "Error: Script generation timed out."
|
121 |
-
except requests.exceptions.RequestException as e:
|
122 |
-
print(f"API Error: Request failed: {e}")
|
123 |
-
error_details = f"Status Code: {e.response.status_code}, Response: {e.response.text}" if e.response else str(e)
|
124 |
-
print(f" Details: {error_details}")
|
125 |
-
return f"Error: Failed connect to script generation service ({e.response.status_code if e.response else 'N/A'})."
|
126 |
-
except Exception as e:
|
127 |
-
print(f"Error during script generation: {e}")
|
128 |
-
traceback.print_exc()
|
129 |
-
return f"Error: Unexpected error during script generation."
|
130 |
-
|
131 |
-
[Rest of the functions with proper formatting...]
|
132 |
-
|
133 |
-
def generate_video_from_script(script, resolution, caption_option, music_file, fps, preset, video_probability):
|
134 |
-
"""Generates the final video from script and options."""
|
135 |
-
global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
|
136 |
-
start_time = time.time()
|
137 |
-
print("\n--- Starting Video Generation ---")
|
138 |
-
print(f" Options: Res={resolution}, Caps={caption_option}, FPS={fps}, Preset={preset}, VidProb={video_probability:.2f}")
|
139 |
-
if music_file:
|
140 |
-
print(f" Music File: {os.path.basename(music_file)}")
|
141 |
-
|
142 |
-
# Setup Resolution
|
143 |
-
if resolution == "Full (1920x1080)":
|
144 |
-
TARGET_RESOLUTION = (1920, 1080)
|
145 |
-
elif resolution == "Short (1080x1920)":
|
146 |
-
TARGET_RESOLUTION = (1080, 1920)
|
147 |
-
else:
|
148 |
-
TARGET_RESOLUTION = (1080, 1920)
|
149 |
-
print("Warning: Unknown resolution, defaulting to Short.")
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
print(f"
|
157 |
-
|
158 |
-
print(f"Error creating temp folder: {e}")
|
159 |
-
return None, 0, 0
|
160 |
-
|
161 |
-
# ImageMagick Policy Fix (optional)
|
162 |
-
if CAPTION_COLOR != "transparent":
|
163 |
-
fix_imagemagick_policy()
|
164 |
-
|
165 |
-
# Parse Script
|
166 |
-
print("Parsing script...")
|
167 |
-
elements = parse_script(script)
|
168 |
-
if not elements:
|
169 |
-
print("Error: Failed to parse script.")
|
170 |
-
shutil.rmtree(TEMP_FOLDER)
|
171 |
-
return None, 0, 0
|
172 |
-
|
173 |
-
paired_elements = []
|
174 |
-
for i in range(0, len(elements), 2):
|
175 |
-
if i + 1 < len(elements) and elements[i]['type'] == 'media' and elements[i+1]['type'] == 'tts':
|
176 |
-
paired_elements.append((elements[i], elements[i+1]))
|
177 |
-
else:
|
178 |
-
print(f"Warning: Skipping mismatched element pair at index {i}.")
|
179 |
-
|
180 |
-
total_segments = len(paired_elements)
|
181 |
-
if total_segments == 0:
|
182 |
-
print("Error: No valid segments found.")
|
183 |
-
shutil.rmtree(TEMP_FOLDER)
|
184 |
-
return None, 0, 0
|
185 |
-
|
186 |
-
# Generate Clips
|
187 |
-
clips = []
|
188 |
-
successful_segments = 0
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
if clip:
|
218 |
-
clips.append(clip)
|
219 |
-
successful_segments += 1
|
220 |
-
print(f"Segment {idx+1} processed in {time.time() - segment_start_time:.2f}s.")
|
221 |
-
else:
|
222 |
-
print("Error: Clip creation failed. Skipping segment.")
|
223 |
-
if os.path.exists(media_asset['path']):
|
224 |
-
try:
|
225 |
-
os.remove(media_asset['path'])
|
226 |
-
except OSError:
|
227 |
-
pass
|
228 |
-
if os.path.exists(tts_path):
|
229 |
-
try:
|
230 |
-
os.remove(tts_path)
|
231 |
-
except OSError:
|
232 |
-
pass
|
233 |
-
continue
|
234 |
-
|
235 |
-
# Final Assembly
|
236 |
-
final_video = None
|
237 |
-
output_path = None
|
238 |
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
|
243 |
-
|
244 |
-
print(f"\nWARNING: Only {successful_segments}/{total_segments} segments succeeded.")
|
245 |
-
|
246 |
-
print(f"\nConcatenating {len(clips)} clips...")
|
247 |
-
try:
|
248 |
-
final_video = concatenate_videoclips(clips, method="compose")
|
249 |
-
print("Concatenation complete.")
|
250 |
-
|
251 |
-
# Close individual clips after successful concatenation
|
252 |
-
print("Closing individual segment clips...")
|
253 |
-
for c in clips:
|
254 |
-
try:
|
255 |
-
c.close()
|
256 |
-
except Exception as e:
|
257 |
-
print(f"Minor error closing segment clip: {e}")
|
258 |
-
|
259 |
-
# Add Music
|
260 |
-
if music_file:
|
261 |
-
final_video = add_background_music(final_video, music_file, bg_music_volume=0.08)
|
262 |
-
|
263 |
-
# Export
|
264 |
-
output_path = OUTPUT_VIDEO_FILENAME
|
265 |
-
print(f"Exporting final video to '{output_path}' (FPS: {fps}, Preset: {preset})...")
|
266 |
-
final_video.write_videofile(
|
267 |
-
output_path,
|
268 |
-
codec='libx264',
|
269 |
-
audio_codec='aac',
|
270 |
-
fps=fps,
|
271 |
-
preset=preset,
|
272 |
-
threads=os.cpu_count() or 4,
|
273 |
-
logger='bar'
|
274 |
-
)
|
275 |
-
print(f"\nFinal video saved: '{output_path}'")
|
276 |
-
print(f"Total generation time: {time.time() - start_time:.2f} seconds.")
|
277 |
-
|
278 |
-
except Exception as e:
|
279 |
-
print(f"FATAL Error during final assembly/export: {e}")
|
280 |
-
traceback.print_exc()
|
281 |
-
output_path = None
|
282 |
-
finally:
|
283 |
-
print("Final cleanup...")
|
284 |
-
if final_video:
|
285 |
try:
|
286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
except Exception as e:
|
288 |
-
print(f"
|
|
|
289 |
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
)
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
)
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
|
|
|
|
|
|
|
|
347 |
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
def on_generate_video(script_text, resolution_choice, captions_choice, music_path, fps, preset, video_probability):
|
359 |
-
if not script_text or "Error:" in script_text or "Failed to generate script" in script_text:
|
360 |
-
yield None, gr.Markdown("❌ Cannot generate: Invalid script.")
|
361 |
-
return
|
362 |
|
363 |
-
|
364 |
-
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
|
367 |
-
|
368 |
-
|
369 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
)
|
376 |
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
else:
|
384 |
-
final_status = "❌ Video generation failed. Please check the console logs for errors."
|
385 |
-
yield None, gr.Markdown(final_status)
|
386 |
-
|
387 |
-
# Connect buttons
|
388 |
-
generate_script_btn.click(
|
389 |
-
fn=on_generate_script,
|
390 |
-
inputs=[concept],
|
391 |
-
outputs=[script, status_message],
|
392 |
-
api_name="generate_script"
|
393 |
-
)
|
394 |
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
)
|
401 |
|
402 |
-
|
403 |
-
|
404 |
-
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
# -*- coding: utf-8 -*-
|
3 |
+
"""
|
4 |
+
AI Documentary Video Generator
|
5 |
+
Version: 2.0
|
6 |
+
Author: AI Assistant
|
7 |
+
Description: Creates documentary-style videos from text prompts using TTS, media APIs, and video processing
|
8 |
+
"""
|
9 |
+
|
10 |
import os
|
11 |
+
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
import re
|
13 |
import time
|
14 |
+
import random
|
15 |
+
import tempfile
|
|
|
|
|
16 |
import shutil
|
17 |
import traceback
|
18 |
+
import math
|
19 |
+
from typing import Optional, Tuple, Dict, List, Union
|
20 |
+
from dataclasses import dataclass
|
21 |
+
from concurrent.futures import ThreadPoolExecutor
|
22 |
+
from urllib.parse import quote
|
23 |
|
24 |
+
# Third-party imports
|
25 |
+
import numpy as np
|
26 |
+
import cv2
|
27 |
+
import requests
|
28 |
+
from PIL import Image
|
29 |
+
import soundfile as sf
|
30 |
+
import torch
|
31 |
+
from bs4 import BeautifulSoup
|
32 |
+
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
+
# MoviePy imports
|
35 |
+
from moviepy.editor import (
|
36 |
+
VideoFileClip, AudioFileClip, ImageClip,
|
37 |
+
concatenate_videoclips, CompositeVideoClip,
|
38 |
+
CompositeAudioClip, TextClip
|
39 |
+
)
|
40 |
+
import moviepy.video.fx.all as vfx
|
41 |
+
import moviepy.config as mpy_config
|
42 |
|
43 |
+
# Custom imports
|
44 |
try:
|
45 |
+
from kokoro import KPipeline
|
46 |
+
except ImportError:
|
47 |
+
print("Warning: Kokoro TTS not available. TTS features will be disabled.")
|
48 |
+
|
49 |
+
# ======================
|
50 |
+
# CONSTANTS & CONFIG
|
51 |
+
# ======================
|
52 |
+
DEFAULT_CONFIG = {
|
53 |
+
'PEXELS_API_KEY': os.getenv('PEXELS_API_KEY', 'your_pexels_key_here'),
|
54 |
+
'OPENROUTER_API_KEY': os.getenv('OPENROUTER_API_KEY', 'your_openrouter_key_here'),
|
55 |
+
'OUTPUT_VIDEO': "documentary_output.mp4",
|
56 |
+
'USER_AGENT': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
57 |
+
'MAX_RETRIES': 3,
|
58 |
+
'RETRY_DELAY': 2,
|
59 |
+
'MAX_VIDEO_DURATION': 600, # 10 minutes
|
60 |
+
'TTS_SAMPLE_RATE': 24000,
|
61 |
+
'DEFAULT_VOICE': 'en',
|
62 |
+
'CAPTION_FONT': 'Arial-Bold',
|
63 |
+
'CAPTION_FONT_SIZES': {
|
64 |
+
'1080p': 40,
|
65 |
+
'720p': 32,
|
66 |
+
'480p': 24
|
67 |
+
},
|
68 |
+
'BACKGROUND_MUSIC_VOLUME': 0.08,
|
69 |
+
'DEFAULT_FPS': 30,
|
70 |
+
'VIDEO_PROBABILITY': 0.45,
|
71 |
+
'RESOLUTIONS': {
|
72 |
+
'1080p': (1920, 1080),
|
73 |
+
'720p': (1280, 720),
|
74 |
+
'480p': (854, 480),
|
75 |
+
'vertical_1080p': (1080, 1920),
|
76 |
+
'vertical_720p': (720, 1280)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
}
|
78 |
+
}
|
79 |
+
|
80 |
+
# ======================
|
81 |
+
# CORE CLASSES
|
82 |
+
# ======================
|
83 |
+
|
84 |
+
@dataclass
|
85 |
+
class VideoSegment:
|
86 |
+
media_path: str
|
87 |
+
tts_path: str
|
88 |
+
narration: str
|
89 |
+
duration: float
|
90 |
+
media_type: str # 'image' or 'video'
|
91 |
+
effects: Dict
|
92 |
+
caption_style: Dict
|
93 |
+
|
94 |
+
class DocumentaryGenerator:
|
95 |
+
def __init__(self, config: Optional[Dict] = None):
|
96 |
+
self.config = config or DEFAULT_CONFIG
|
97 |
+
self.tts_pipeline = None
|
98 |
+
self.temp_dir = None
|
99 |
+
self.current_resolution = None
|
100 |
+
self.caption_color = None
|
101 |
+
|
102 |
+
# Initialize subsystems
|
103 |
+
self._initialize_tts()
|
104 |
+
self._configure_imagemagick()
|
105 |
|
106 |
+
def _initialize_tts(self):
|
107 |
+
"""Initialize the TTS pipeline"""
|
108 |
+
try:
|
109 |
+
if 'KPipeline' in globals():
|
110 |
+
print("Initializing Kokoro TTS pipeline...")
|
111 |
+
self.tts_pipeline = KPipeline(lang_code='a')
|
112 |
+
print("TTS pipeline ready")
|
113 |
+
except Exception as e:
|
114 |
+
print(f"Could not initialize TTS: {str(e)}")
|
115 |
+
self.tts_pipeline = None
|
116 |
+
|
117 |
+
def _configure_imagemagick(self):
|
118 |
+
"""Configure ImageMagick paths"""
|
119 |
+
try:
|
120 |
+
common_paths = [
|
121 |
+
"/usr/bin/convert",
|
122 |
+
"/usr/local/bin/convert",
|
123 |
+
"/opt/homebrew/bin/convert"
|
124 |
+
]
|
125 |
+
for path in common_paths:
|
126 |
+
if os.path.exists(path):
|
127 |
+
mpy_config.change_settings({"IMAGEMAGICK_BINARY": path})
|
128 |
+
print(f"ImageMagick configured: {path}")
|
129 |
+
break
|
130 |
else:
|
131 |
+
print("ImageMagick not found - text rendering may be limited")
|
132 |
+
except Exception as e:
|
133 |
+
print(f"ImageMagick config error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
+
def _create_temp_dir(self):
|
136 |
+
"""Create a temporary working directory"""
|
137 |
+
if self.temp_dir and os.path.exists(self.temp_dir):
|
138 |
+
shutil.rmtree(self.temp_dir)
|
139 |
+
self.temp_dir = tempfile.mkdtemp(prefix="docgen_")
|
140 |
+
print(f"Created temp directory: {self.temp_dir}")
|
141 |
+
return self.temp_dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
+
def _cleanup(self):
|
144 |
+
"""Clean up temporary resources"""
|
145 |
+
if self.temp_dir and os.path.exists(self.temp_dir):
|
146 |
+
try:
|
147 |
+
shutil.rmtree(self.temp_dir)
|
148 |
+
print("Cleaned up temporary files")
|
149 |
+
except Exception as e:
|
150 |
+
print(f"Cleanup error: {str(e)}")
|
151 |
+
|
152 |
+
def generate_script(self, topic: str) -> str:
|
153 |
+
"""Generate a documentary script using OpenRouter API"""
|
154 |
+
if not self.config['OPENROUTER_API_KEY']:
|
155 |
+
return "Error: OpenRouter API key not configured"
|
156 |
+
|
157 |
+
prompt = f"""Create a funny, engaging documentary script about {topic}.
|
158 |
+
Format each section with [TITLE] followed by narration text.
|
159 |
+
Keep narration concise (1-2 sentences per section).
|
160 |
+
Include at least 5 sections.
|
161 |
+
End with a humorous call-to-action."""
|
162 |
|
163 |
+
headers = {
|
164 |
+
'Authorization': f'Bearer {self.config['OPENROUTER_API_KEY']}',
|
165 |
+
'Content-Type': 'application/json'
|
166 |
+
}
|
167 |
+
|
168 |
+
data = {
|
169 |
+
"model": "mistralai/mistral-small-3.1-24b-instruct:free",
|
170 |
+
"messages": [{"role": "user", "content": prompt}],
|
171 |
+
"temperature": 0.7,
|
172 |
+
"max_tokens": 1024
|
173 |
+
}
|
174 |
+
|
175 |
+
try:
|
176 |
+
response = requests.post(
|
177 |
+
'https://openrouter.ai/api/v1/chat/completions',
|
178 |
+
headers=headers,
|
179 |
+
json=data,
|
180 |
+
timeout=30
|
181 |
+
)
|
182 |
+
response.raise_for_status()
|
183 |
+
return response.json()['choices'][0]['message']['content']
|
184 |
+
except Exception as e:
|
185 |
+
return f"Error generating script: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|
187 |
+
def _download_media(self, url: str, filename: str) -> Optional[str]:
|
188 |
+
"""Download media file from URL"""
|
189 |
+
local_path = os.path.join(self.temp_dir, filename)
|
190 |
|
191 |
+
for attempt in range(self.config['MAX_RETRIES']):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
try:
|
193 |
+
with requests.get(url, stream=True, timeout=15) as r:
|
194 |
+
r.raise_for_status()
|
195 |
+
with open(local_path, 'wb') as f:
|
196 |
+
for chunk in r.iter_content(chunk_size=8192):
|
197 |
+
f.write(chunk)
|
198 |
+
|
199 |
+
# Validate downloaded file
|
200 |
+
if os.path.getsize(local_path) > 1024:
|
201 |
+
return local_path
|
202 |
+
|
203 |
except Exception as e:
|
204 |
+
print(f"Download attempt {attempt + 1} failed: {str(e)}")
|
205 |
+
time.sleep(self.config['RETRY_DELAY'] * (attempt + 1))
|
206 |
|
207 |
+
return None
|
208 |
+
|
209 |
+
def _search_pexels_video(self, query: str) -> Optional[str]:
|
210 |
+
"""Search for videos on Pexels"""
|
211 |
+
if not self.config['PEXELS_API_KEY']:
|
212 |
+
return None
|
213 |
+
|
214 |
+
headers = {'Authorization': self.config['PEXELS_API_KEY']}
|
215 |
+
params = {
|
216 |
+
'query': query,
|
217 |
+
'per_page': 15,
|
218 |
+
'orientation': 'landscape'
|
219 |
+
}
|
220 |
|
221 |
+
try:
|
222 |
+
response = requests.get(
|
223 |
+
'https://api.pexels.com/videos/search',
|
224 |
+
headers=headers,
|
225 |
+
params=params,
|
226 |
+
timeout=10
|
227 |
+
)
|
228 |
+
response.raise_for_status()
|
229 |
+
|
230 |
+
videos = response.json().get('videos', [])
|
231 |
+
if videos:
|
232 |
+
video_files = videos[0].get('video_files', [])
|
233 |
+
for file in video_files:
|
234 |
+
if file.get('quality') == 'hd':
|
235 |
+
return file.get('link')
|
236 |
+
except Exception as e:
|
237 |
+
print(f"Pexels search error: {str(e)}")
|
238 |
+
|
239 |
+
return None
|
240 |
+
|
241 |
+
def _generate_tts(self, text: str) -> Optional[str]:
|
242 |
+
"""Generate TTS audio for narration"""
|
243 |
+
if not self.tts_pipeline:
|
244 |
+
return None
|
245 |
+
|
246 |
+
safe_name = re.sub(r'[^\w\-_]', '', text[:20]) + '.wav'
|
247 |
+
output_path = os.path.join(self.temp_dir, safe_name)
|
248 |
+
|
249 |
+
try:
|
250 |
+
audio_segments = []
|
251 |
+
for chunk in self.tts_pipeline(text, speed=1.0):
|
252 |
+
if isinstance(chunk, tuple):
|
253 |
+
chunk = chunk[-1] # Get audio data from tuple
|
254 |
+
audio_segments.append(chunk)
|
255 |
+
|
256 |
+
full_audio = np.concatenate(audio_segments)
|
257 |
+
sf.write(output_path, full_audio, self.config['TTS_SAMPLE_RATE'])
|
258 |
+
return output_path
|
259 |
+
except Exception as e:
|
260 |
+
print(f"TTS generation error: {str(e)}")
|
261 |
+
return None
|
262 |
+
|
263 |
+
def _create_video_segment(self, segment: VideoSegment) -> Optional[VideoClip]:
|
264 |
+
"""Create a single video segment with media, audio, and effects"""
|
265 |
+
try:
|
266 |
+
# Load media
|
267 |
+
if segment.media_type == 'video':
|
268 |
+
media_clip = VideoFileClip(segment.media_path)
|
269 |
+
else:
|
270 |
+
media_clip = ImageClip(segment.media_path).set_duration(segment.duration)
|
271 |
+
|
272 |
+
# Apply effects
|
273 |
+
if segment.media_type == 'image':
|
274 |
+
media_clip = self._apply_kenburns(media_clip)
|
275 |
+
else:
|
276 |
+
media_clip = self._resize_clip(media_clip)
|
277 |
+
|
278 |
+
# Add audio
|
279 |
+
audio_clip = AudioFileClip(segment.tts_path)
|
280 |
+
media_clip = media_clip.set_audio(audio_clip)
|
281 |
+
|
282 |
+
# Add captions if enabled
|
283 |
+
if segment.caption_style.get('enabled', False):
|
284 |
+
media_clip = self._add_captions(media_clip, segment.narration, segment.caption_style)
|
285 |
+
|
286 |
+
return media_clip
|
287 |
+
except Exception as e:
|
288 |
+
print(f"Segment creation error: {str(e)}")
|
289 |
+
traceback.print_exc()
|
290 |
+
return None
|
291 |
+
|
292 |
+
def _apply_kenburns(self, clip: ImageClip) -> VideoClip:
|
293 |
+
"""Apply Ken Burns effect to an image clip"""
|
294 |
+
try:
|
295 |
+
target_w, target_h = self.current_resolution
|
296 |
+
base_scale = 1.2
|
297 |
+
|
298 |
+
# Choose random effect
|
299 |
+
effects = {
|
300 |
+
'zoom_in': {'start_scale': 1.0, 'end_scale': base_scale},
|
301 |
+
'zoom_out': {'start_scale': base_scale, 'end_scale': 1.0},
|
302 |
+
'pan_left': {'start_pos': (0.7, 0.5), 'end_pos': (0.3, 0.5)},
|
303 |
+
'pan_right': {'start_pos': (0.3, 0.5), 'end_pos': (0.7, 0.5)}
|
304 |
+
}
|
305 |
+
effect = random.choice(list(effects.values()))
|
306 |
+
|
307 |
+
def transform(get_frame, t):
|
308 |
+
ratio = t / clip.duration
|
309 |
+
scale = effect['start_scale'] + (effect['end_scale'] - effect['start_scale']) * ratio
|
310 |
+
pos_x = effect['start_pos'][0] + (effect['end_pos'][0] - effect['start_pos'][0]) * ratio
|
311 |
+
pos_y = effect['start_pos'][1] + (effect['end_pos'][1] - effect['start_pos'][1]) * ratio
|
312 |
+
|
313 |
+
frame = get_frame(t)
|
314 |
+
h, w = frame.shape[:2]
|
315 |
+
crop_size = (int(w/scale), int(h/scale))
|
316 |
+
|
317 |
+
center_x = int(pos_x * w)
|
318 |
+
center_y = int(pos_y * h)
|
319 |
+
|
320 |
+
x = max(0, min(center_x - crop_size[0]//2, w - crop_size[0]))
|
321 |
+
y = max(0, min(center_y - crop_size[1]//2, h - crop_size[1]))
|
322 |
+
|
323 |
+
cropped = frame[y:y+crop_size[1], x:x+crop_size[0]]
|
324 |
+
return cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
|
325 |
+
|
326 |
+
return clip.fl(transform)
|
327 |
+
except Exception as e:
|
328 |
+
print(f"Ken Burns error: {str(e)}")
|
329 |
+
return self._resize_clip(clip)
|
330 |
+
|
331 |
+
def _resize_clip(self, clip: Union[VideoClip, ImageClip]) -> VideoClip:
|
332 |
+
"""Resize clip to target resolution"""
|
333 |
+
try:
|
334 |
+
target_w, target_h = self.current_resolution
|
335 |
+
clip_aspect = clip.w / clip.h
|
336 |
+
target_aspect = target_w / target_h
|
337 |
+
|
338 |
+
if abs(clip_aspect - target_aspect) < 0.01:
|
339 |
+
return clip.resize((target_w, target_h))
|
340 |
+
|
341 |
+
if clip_aspect > target_aspect:
|
342 |
+
# Wider than target
|
343 |
+
new_height = target_h
|
344 |
+
new_width = int(clip.w * (new_height / clip.h))
|
345 |
+
resized = clip.resize(height=new_height)
|
346 |
+
crop = (resized.w - target_w) / 2
|
347 |
+
return resized.crop(x1=crop, y1=0, x2=crop+target_w, y2=target_h)
|
348 |
+
else:
|
349 |
+
# Taller than target
|
350 |
+
new_width = target_w
|
351 |
+
new_height = int(clip.h * (new_width / clip.w))
|
352 |
+
resized = clip.resize(width=new_width)
|
353 |
+
crop = (resized.h - target_h) / 2
|
354 |
+
return resized.crop(x1=0, y1=crop, x2=target_w, y2=crop+target_h)
|
355 |
+
except Exception as e:
|
356 |
+
print(f"Resize error: {str(e)}")
|
357 |
+
return clip
|
358 |
+
|
359 |
+
def _add_captions(self, clip: VideoClip, text: str, style: Dict) -> VideoClip:
|
360 |
+
"""Add captions to a video clip"""
|
361 |
+
try:
|
362 |
+
words = text.split()
|
363 |
+
chunks = []
|
364 |
+
current_chunk = []
|
365 |
+
char_count = 0
|
366 |
+
|
367 |
+
# Split text into manageable chunks
|
368 |
+
for word in words:
|
369 |
+
if char_count + len(word) > 30 and current_chunk:
|
370 |
+
chunks.append(' '.join(current_chunk))
|
371 |
+
current_chunk = [word]
|
372 |
+
char_count = len(word)
|
373 |
+
else:
|
374 |
+
current_chunk.append(word)
|
375 |
+
char_count += len(word) + 1
|
376 |
+
|
377 |
+
if current_chunk:
|
378 |
+
chunks.append(' '.join(current_chunk))
|
379 |
+
|
380 |
+
# Create text clips
|
381 |
+
text_clips = []
|
382 |
+
duration_per_chunk = clip.duration / len(chunks)
|
383 |
+
|
384 |
+
for i, chunk in enumerate(chunks):
|
385 |
+
txt_clip = TextClip(
|
386 |
+
chunk,
|
387 |
+
fontsize=style['font_size'],
|
388 |
+
font=style['font'],
|
389 |
+
color=style['color'],
|
390 |
+
bg_color=style['bg_color'],
|
391 |
+
stroke_color=style['stroke_color'],
|
392 |
+
stroke_width=style['stroke_width'],
|
393 |
+
method='caption',
|
394 |
+
size=(style['max_width'], None)
|
395 |
+
).set_start(i * duration_per_chunk).set_duration(duration_per_chunk)
|
396 |
+
|
397 |
+
txt_clip = txt_clip.set_position(('center', style['y_position']))
|
398 |
+
text_clips.append(txt_clip)
|
399 |
+
|
400 |
+
return CompositeVideoClip([clip] + text_clips)
|
401 |
+
except Exception as e:
|
402 |
+
print(f"Caption error: {str(e)}")
|
403 |
+
return clip
|
404 |
+
|
405 |
+
def generate_video(self, script: str, resolution: str, captions: bool,
|
406 |
+
music_path: Optional[str] = None) -> Optional[str]:
|
407 |
+
"""Main video generation pipeline"""
|
408 |
+
start_time = time.time()
|
409 |
+
|
410 |
+
try:
|
411 |
+
# Setup
|
412 |
+
self._create_temp_dir()
|
413 |
+
self.current_resolution = self.config['RESOLUTIONS'].get(resolution, (1920, 1080))
|
414 |
+
self.caption_color = 'white' if captions else None
|
415 |
+
|
416 |
+
# Parse script into segments
|
417 |
+
segments = self._parse_script(script)
|
418 |
+
if not segments:
|
419 |
+
print("Error: No valid segments found in script")
|
420 |
+
return None
|
421 |
+
|
422 |
+
# Process segments in parallel
|
423 |
+
with ThreadPoolExecutor() as executor:
|
424 |
+
video_segments = list(executor.map(self._process_segment, segments))
|
425 |
+
|
426 |
+
# Combine segments
|
427 |
+
final_clip = concatenate_videoclips(
|
428 |
+
[s for s in video_segments if s is not None],
|
429 |
+
method="compose"
|
430 |
)
|
431 |
+
|
432 |
+
# Add background music if provided
|
433 |
+
if music_path and os.path.exists(music_path):
|
434 |
+
music_clip = AudioFileClip(music_path).volumex(self.config['BACKGROUND_MUSIC_VOLUME'])
|
435 |
+
if music_clip.duration < final_clip.duration:
|
436 |
+
music_clip = music_clip.loop(duration=final_clip.duration)
|
437 |
+
final_clip = final_clip.set_audio(
|
438 |
+
CompositeAudioClip([final_clip.audio, music_clip])
|
439 |
+
)
|
440 |
+
|
441 |
+
# Export final video
|
442 |
+
output_path = self.config['OUTPUT_VIDEO']
|
443 |
+
final_clip.write_videofile(
|
444 |
+
output_path,
|
445 |
+
codec='libx264',
|
446 |
+
audio_codec='aac',
|
447 |
+
fps=self.config['DEFAULT_FPS'],
|
448 |
+
threads=os.cpu_count() or 4
|
449 |
)
|
450 |
+
|
451 |
+
print(f"Video generated in {time.time() - start_time:.2f} seconds")
|
452 |
+
return output_path
|
453 |
+
|
454 |
+
except Exception as e:
|
455 |
+
print(f"Video generation failed: {str(e)}")
|
456 |
+
traceback.print_exc()
|
457 |
+
return None
|
458 |
+
finally:
|
459 |
+
self._cleanup()
|
460 |
+
|
461 |
+
def _parse_script(self, script: str) -> List[Dict]:
|
462 |
+
"""Parse script into media and narration segments"""
|
463 |
+
segments = []
|
464 |
+
current_title = None
|
465 |
+
current_text = ""
|
466 |
|
467 |
+
for line in script.split('\n'):
|
468 |
+
line = line.strip()
|
469 |
+
if not line:
|
470 |
+
continue
|
471 |
+
|
472 |
+
# Check for section title
|
473 |
+
title_match = re.match(r'^\s*\[([^\]]+)\]\s*(.*)', line)
|
474 |
+
if title_match:
|
475 |
+
if current_title and current_text:
|
476 |
+
segments.append({
|
477 |
+
'title': current_title,
|
478 |
+
'text': current_text.strip()
|
479 |
+
})
|
480 |
+
current_title = title_match.group(1).strip()
|
481 |
+
current_text = title_match.group(2).strip() + " "
|
482 |
+
elif current_title:
|
483 |
+
current_text += line + " "
|
484 |
|
485 |
+
# Add the last segment
|
486 |
+
if current_title and current_text:
|
487 |
+
segments.append({
|
488 |
+
'title': current_title,
|
489 |
+
'text': current_text.strip()
|
490 |
+
})
|
|
|
|
|
|
|
|
|
|
|
491 |
|
492 |
+
return segments
|
493 |
+
|
494 |
+
def _process_segment(self, segment: Dict) -> Optional[VideoSegment]:
|
495 |
+
"""Process a single script segment into a video segment"""
|
496 |
+
try:
|
497 |
+
# Get media
|
498 |
+
use_video = random.random() < self.config['VIDEO_PROBABILITY']
|
499 |
+
if use_video:
|
500 |
+
media_url = self._search_pexels_video(segment['title'])
|
501 |
+
media_type = 'video'
|
502 |
+
else:
|
503 |
+
media_url = self._search_pexels_image(segment['title'])
|
504 |
+
media_type = 'image'
|
505 |
+
|
506 |
+
if not media_url:
|
507 |
+
print(f"No media found for: {segment['title']}")
|
508 |
+
return None
|
509 |
+
|
510 |
+
# Download media
|
511 |
+
media_ext = '.mp4' if media_type == 'video' else '.jpg'
|
512 |
+
media_filename = f"media_{len(segment['title'])}_media_ext"
|
513 |
+
media_path = self._download_media(media_url, media_filename)
|
514 |
+
|
515 |
+
if not media_path:
|
516 |
+
print(f"Failed to download media for: {segment['title']}")
|
517 |
+
return None
|
518 |
+
|
519 |
+
# Generate TTS
|
520 |
+
tts_path = self._generate_tts(segment['text'])
|
521 |
+
if not tts_path:
|
522 |
+
print(f"Failed to generate TTS for: {segment['title']}")
|
523 |
+
return None
|
524 |
+
|
525 |
+
# Calculate duration based on TTS
|
526 |
+
tts_duration = AudioFileClip(tts_path).duration
|
527 |
+
duration = max(3.0, min(tts_duration * 1.1, 10.0)) # 3-10 seconds
|
528 |
+
|
529 |
+
# Prepare caption style
|
530 |
+
caption_style = {
|
531 |
+
'enabled': self.caption_color is not None,
|
532 |
+
'font_size': self._get_font_size(),
|
533 |
+
'font': self.config['CAPTION_FONT'],
|
534 |
+
'color': self.caption_color or 'white',
|
535 |
+
'bg_color': 'rgba(0,0,0,0.5)',
|
536 |
+
'stroke_color': 'black',
|
537 |
+
'stroke_width': 1.5,
|
538 |
+
'max_width': int(self.current_resolution[0] * 0.8),
|
539 |
+
'y_position': int(self.current_resolution[1] * 0.8)
|
540 |
+
}
|
541 |
+
|
542 |
+
return VideoSegment(
|
543 |
+
media_path=media_path,
|
544 |
+
tts_path=tts_path,
|
545 |
+
narration=segment['text'],
|
546 |
+
duration=duration,
|
547 |
+
media_type=media_type,
|
548 |
+
effects={'type': 'random'},
|
549 |
+
caption_style=caption_style
|
550 |
+
)
|
551 |
|
552 |
+
except Exception as e:
|
553 |
+
print(f"Segment processing error: {str(e)}")
|
554 |
+
return None
|
555 |
+
|
556 |
+
def _get_font_size(self) -> int:
|
557 |
+
"""Get appropriate font size for current resolution"""
|
558 |
+
if self.current_resolution[1] >= 1080:
|
559 |
+
return self.config['CAPTION_FONT_SIZES']['1080p']
|
560 |
+
elif self.current_resolution[1] >= 720:
|
561 |
+
return self.config['CAPTION_FONT_SIZES']['720p']
|
562 |
+
else:
|
563 |
+
return self.config['CAPTION_FONT_SIZES']['480p']
|
564 |
|
565 |
+
# ======================
|
566 |
+
# GRADIO INTERFACE
|
567 |
+
# ======================
|
568 |
+
|
569 |
+
def create_gradio_interface():
|
570 |
+
"""Create the Gradio web interface"""
|
571 |
+
generator = DocumentaryGenerator()
|
572 |
+
|
573 |
+
with gr.Blocks(title="AI Documentary Maker", theme="soft") as app:
|
574 |
+
gr.Markdown("# AI Documentary Video Generator")
|
575 |
|
576 |
+
with gr.Row():
|
577 |
+
with gr.Column():
|
578 |
+
topic_input = gr.Textbox(label="Documentary Topic", placeholder="Enter your topic...")
|
579 |
+
generate_script_btn = gr.Button("Generate Script")
|
580 |
+
script_output = gr.Textbox(label="Generated Script", lines=10, interactive=True)
|
581 |
+
|
582 |
+
with gr.Accordion("Advanced Options", open=False):
|
583 |
+
resolution = gr.Dropdown(
|
584 |
+
list(generator.config['RESOLUTIONS'].keys()),
|
585 |
+
value="1080p",
|
586 |
+
label="Resolution"
|
587 |
+
)
|
588 |
+
captions = gr.Checkbox(
|
589 |
+
value=True,
|
590 |
+
label="Enable Captions"
|
591 |
+
)
|
592 |
+
music_input = gr.Audio(
|
593 |
+
label="Background Music",
|
594 |
+
type="filepath",
|
595 |
+
optional=True
|
596 |
+
)
|
597 |
+
video_prob = gr.Slider(
|
598 |
+
minimum=0.1,
|
599 |
+
maximum=1.0,
|
600 |
+
value=generator.config['VIDEO_PROBABILITY'],
|
601 |
+
label="Video Clip Probability"
|
602 |
+
)
|
603 |
+
|
604 |
+
generate_video_btn = gr.Button("Generate Video", variant="primary")
|
605 |
+
|
606 |
+
with gr.Column():
|
607 |
+
video_output = gr.Video(label="Generated Documentary")
|
608 |
+
status_output = gr.Textbox(label="Status", interactive=False)
|
609 |
+
|
610 |
+
# Event handlers
|
611 |
+
generate_script_btn.click(
|
612 |
+
fn=generator.generate_script,
|
613 |
+
inputs=[topic_input],
|
614 |
+
outputs=[script_output]
|
615 |
)
|
616 |
|
617 |
+
generate_video_btn.click(
|
618 |
+
fn=generator.generate_video,
|
619 |
+
inputs=[script_output, resolution, captions, music_input],
|
620 |
+
outputs=[video_output],
|
621 |
+
api_name="generate"
|
622 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
623 |
|
624 |
+
return app
|
625 |
+
|
626 |
+
# ======================
|
627 |
+
# MAIN EXECUTION
|
628 |
+
# ======================
|
|
|
629 |
|
630 |
+
if __name__ == "__main__":
|
631 |
+
app = create_gradio_interface()
|
632 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|