Spaces:

testdeep123
/

sda

Running

App Files Files Community

testdeep123 commited on 2 days ago

Commit

c25723f

verified ·

1 Parent(s): fb20f92

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -193

app.py CHANGED Viewed

@@ -1,210 +1,191 @@
 import os
-from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip
-import tempfile
 import random
 import shutil
-from gtts import gTTS
-from PIL import Image, ImageDraw, ImageFont
 import numpy as np
-import textwrap
 import gradio as gr
-# Global Configuration
-OUTPUT_VIDEO_FILENAME = "final_video.mp4"
-TARGET_RESOLUTION = (1080, 1920)  # Vertical video resolution
-TEMP_FOLDER = None
-font_size = 45
-fps = 30
-preset = "veryfast"
-bg_music_volume = 0.08
-def generate_dummy_script():
-    """Generate a dummy script that results in approximately 64 seconds of narration."""
-    return """
-    [Intro]
-    The world is full of natural wonders.
-    [Forests]
-    Forests cover vast regions of the planet.
-    [Rivers]
-    Rivers flow through landscapes shaping the earth.
-    [Mountains]
-    Mountains stand tall against the sky above.
-    [Oceans]
-    Oceans hold mysteries beneath their waves.
-    [Wildlife]
-    Wildlife thrives in diverse habitats worldwide.
-    [Conclusion]
-    Nature continues to inspire us all.
-    """
-def parse_script(script_text):
-    """Parse the script to extract full narration text."""
-    sections = {}
-    current_title = None
-    current_text = ""
     for line in script_text.splitlines():
-        line = line.strip()
-        if line.startswith("[") and "]" in line:
-            bracket_start = line.find("[")
-            bracket_end = line.find("]", bracket_start)
-            if bracket_start != -1 and bracket_end != -1:
-                if current_title is not None:
-                    sections[current_title] = current_text.strip()
-                current_title = line[bracket_start+1:bracket_end]
-                current_text = line[bracket_end+1:].strip()
-        elif current_title:
-            current_text += line + " "
-    if current_title:
-        sections[current_title] = current_text.strip()
-    full_narration = " ".join(sections.values())
-    return full_narration
-def generate_tts(text):
-    """Generate TTS audio for the full narration."""
-    safe_text = "narration"
-    file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
-    try:
-        tts = gTTS(text=text, lang='en')
-        mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
-        tts.save(mp3_path)
-        from pydub import AudioSegment
-        audio = AudioSegment.from_mp3(mp3_path)
-        audio.export(file_path, format="wav")
-        os.remove(mp3_path)
-        print(f"TTS audio saved to {file_path}")
-        return file_path
-    except Exception as e:
-        print(f"TTS generation failed: {e}")
-        return None
-def get_audio_duration(audio_path):
-    """Get the duration of the audio file."""
-    audio = AudioFileClip(audio_path)
-    duration = audio.duration
-    audio.close()
-    return duration
-def generate_subtitle_image(text, font_path="arial.ttf", font_size=45, text_color=(255, 255, 255, 255), bg_color=(0, 0, 0, 64), size=(1080, 200)):
-    """Generate a subtitle image with wrapped text."""
-    img = Image.new('RGBA', size, (0, 0, 0, 0))  # Transparent background
-    draw = ImageDraw.Draw(img)
-    if bg_color:
-        draw.rectangle([(0, 0), size], fill=bg_color)
     try:
-        font = ImageFont.truetype(font_path, font_size)
-    except IOError:
-        font = ImageFont.load_default()
-    lines = textwrap.wrap(text, width=40)
-    line_height = font.getsize('hg')[1]
-    total_height = line_height * len(lines)
-    y_start = (size[1] - total_height) / 2
-    for i, line in enumerate(lines):
-        text_width, _ = draw.textsize(line, font=font)
-        x = (size[0] - text_width) / 2
-        y = y_start + i * line_height
-        draw.text((x, y), line, font=font, fill=text_color)
-    return np.array(img)
-def add_background_music(video_clip):
-    """Add background music to the video if available."""
-    bg_music_path = "music.mp3"
-    if os.path.exists(bg_music_path):
-        bg_music = AudioFileClip(bg_music_path)
-        if bg_music.duration < video_clip.duration:
-            from moviepy.audio.AudioClip import concatenate_audioclips
-            loops_needed = int(video_clip.duration / bg_music.duration) + 1
-            bg_music = concatenate_audioclips([bg_music] * loops_needed)
-        bg_music = bg_music.subclip(0, video_clip.duration)
-        bg_music = bg_music.volumex(bg_music_volume)
-        video_audio = video_clip.audio
-        from moviepy.audio.AudioClip import CompositeAudioClip
-        mixed_audio = CompositeAudioClip([video_audio, bg_music])
-        video_clip = video_clip.set_audio(mixed_audio)
-    return video_clip
-def generate_video(user_input, include_captions):
-    """Generate a video with voiceover and optional captions."""
-    global TEMP_FOLDER
-    TEMP_FOLDER = tempfile.mkdtemp()
-    # Generate and parse script
-    script = generate_dummy_script()  # Using dummy script for 64s duration
-    full_narration = parse_script(script)
-    print("Full Narration:", full_narration)
-    # Generate voiceover
-    tts_path = generate_tts(full_narration)
-    if not tts_path:
-        shutil.rmtree(TEMP_FOLDER)
-        return None
-    # Assuming voiceover is 64 seconds as per requirement
-    audio_duration = 64  # Hardcoded for this example
-    video_duration = audio_duration + 0.5  # 64.5 seconds clip
-    # Cut video clip from 13-minute video
-    video_path = "video.mp4"
-    if not os.path.exists(video_path):
-        print("video.mp4 not found.")
-        shutil.rmtree(TEMP_FOLDER)
-        return None
-    long_video = VideoFileClip(video_path)
-    total_duration = long_video.duration  # 13 minutes = 780 seconds
-    if total_duration < video_duration:
-        print("Video is too short.")
-        long_video.close()
-        shutil.rmtree(TEMP_FOLDER)
-        return None
-    start_time = random.uniform(0, total_duration - video_duration)
-    video_clip = long_video.subclip(start_time, start_time + video_duration)
-    long_video.close()
-    # Set voiceover audio
-    video_clip = video_clip.set_audio(AudioFileClip(tts_path))
-    # Add captions if requested
-    if include_captions == "Yes":
-        words = full_narration.split()
-        num_words = len(words)
-        word_duration = audio_duration / num_words  # Timing based on audio_duration
-        chunks = [words[i:i+5] for i in range(0, num_words, 5)]
-        subtitle_clips = []
         for i, chunk in enumerate(chunks):
-            chunk_text = ' '.join(chunk)
-            start_idx = i * 5
-            end_idx = start_idx + len(chunk) - 1
-            start_time_chunk = start_idx * word_duration
-            end_time_chunk = min((end_idx + 1) * word_duration, audio_duration)
-            subtitle_img = generate_subtitle_image(chunk_text, font_size=font_size)
-            txt_clip = ImageClip(subtitle_img).set_start(start_time_chunk).set_duration(end_time_chunk - start_time_chunk)
-            txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] - 200))
-            subtitle_clips.append(txt_clip)
-        video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
-    # Add background music
-    video_clip = add_background_music(video_clip)
-    # Export video
-    video_clip.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
-    print(f"Video saved as {OUTPUT_VIDEO_FILENAME}")
-    # Cleanup
-    shutil.rmtree(TEMP_FOLDER)
-    return OUTPUT_VIDEO_FILENAME
-# Gradio Interface
 iface = gr.Interface(
     fn=generate_video,
     inputs=[
-        gr.Textbox(label="Video Concept", placeholder="Enter concept (ignored for this example)"),
-        gr.Radio(["Yes", "No"], label="Include Captions", value="No")
     ],
     outputs=gr.Video(label="Generated Video"),
-    title="Video Generator",
-    description="Generates a 64.5s video clip with a 64s voiceover from a 13min video."
 )
 if __name__ == "__main__":
-    iface.launch()

 import os
+import re
+import math
 import random
+import tempfile
 import shutil
+import requests
 import numpy as np
+from kokoro import KPipeline
+import soundfile as sf
+from pydub import AudioSegment
+from gtts import gTTS
 import gradio as gr
+from moviepy.editor import (
+    VideoFileClip, AudioFileClip, concatenate_audioclips,
+    CompositeAudioClip, CompositeVideoClip, TextClip
+)
+# ────────── GLOBAL CONFIG ──────────
+OPENROUTER_API_KEY = 'sk-or-v1-…'
+OPENROUTER_MODEL     = "google/gemini-2.0-flash-exp:free"
+SOURCE_VIDEO_PATH    = "video.mp4"   # 13-min source
+OUTPUT_VIDEO_PATH    = "final_video.mp4"
+TARGET_RESOLUTION    = (1080, 1920)  # Vertical TikTok style
+VOICE_SPEED          = 0.9
+CAPTION_FONT_SIZE    = 45
+BG_MUSIC_VOLUME      = 0.08
+# Initialize Kokoro TTS
+pipeline = KPipeline(lang_code='a')  # American English
+# ────────── SCRIPT GENERATION ──────────
+def generate_script(topic: str) -> str:
+    headers = {
+        'Authorization': f'Bearer {OPENROUTER_API_KEY}',
+        'HTTP-Referer': 'https://your-domain.com',
+        'X-Title': 'AI Documentary Maker'
+    }
+    prompt = f"""You’re a professional documentary narrator.
+Break your script into scenes with [Tags], one sentence each (≤12 words).
+No slang or numbers. At the end, include [Subscribe] with a formal reason.
+Topic: {topic}
+"""
+    payload = {
+        'model': OPENROUTER_MODEL,
+        'messages': [{'role':'user','content':prompt}],
+        'temperature':0.4,
+        'max_tokens':5000
+    }
+    r = requests.post('https://openrouter.ai/api/v1/chat/completions',
+                      headers=headers, json=payload, timeout=30)
+    r.raise_for_status()
+    return r.json()['choices'][0]['message']['content']
+def parse_script(script_text: str):
+    """Return list of (scene_title, sentence_text)."""
+    sections = []
+    current = None
     for line in script_text.splitlines():
+        m = re.match(r'^\[(.+?)\]\s*(.*)$', line)
+        if m:
+            if current:
+                sections.append(current)
+            current = [m.group(1), m.group(2)]
+        elif current and line.strip():
+            current[1] += ' ' + line.strip()
+    if current:
+        sections.append(current)
+    return sections
+# ────────── TTS ──────────
+def generate_tts_audio(text: str, voice_code: str, dirpath: str) -> str:
+    """Produce a WAV file, using Kokoro then gTTS fallback."""
+    safe = re.sub(r'[^\w]', '_', text[:10]).strip()
+    out_path = os.path.join(dirpath, f"tts_{safe}.wav")
+    if os.path.exists(out_path):
+        return out_path
     try:
+        # Kokoro pipeline returns a sequence of numpy audio arrays
+        segments = pipeline(text, voice=voice_code, speed=VOICE_SPEED, split_pattern=r'\n+')
+        arrays = [seg_audio for _, _, seg_audio in segments]
+        combined = np.concatenate(arrays, axis=0)
+        sf.write(out_path, combined, 24000)
+    except Exception:
+        # fallback to gTTS
+        mp3 = os.path.join(dirpath, f"{safe}.mp3")
+        gTTS(text=text, lang='en').save(mp3)
+        wav = AudioSegment.from_mp3(mp3)
+        wav.export(out_path, format="wav")
+        os.remove(mp3)
+    return out_path
+# ────────── VIDEO + SUBTITLES ──────────
+def add_pillow_subtitles(clip, sections):
+    """Overlay each sentence as timed subtitles using Pillow (no ImageMagick)."""
+    subtitles = []
+    cum_time = 0
+    for title, sentence in sections:
+        audio_path = clip.audio  # duration only
+        # split sentence into 5-word chunks
+        words = sentence.split()
+        chunks = [words[i:i+5] for i in range(0, len(words), 5)]
+        seg_dur = clip.duration * (len(sentence.split()) / sum(len(s.split()) for _, s in sections))
+        # approximate each chunk duration
+        chunk_dur = seg_dur / len(chunks) if chunks else seg_dur
         for i, chunk in enumerate(chunks):
+            txt = ' '.join(chunk)
+            txt_clip = (
+                TextClip(txt, fontsize=CAPTION_FONT_SIZE, font='Arial-Bold',
+                         color='white', bg_color='rgba(0,0,0,0.3)',
+                         size=(TARGET_RESOLUTION[0]*0.9, None),
+                         method='pillow')
+                .set_start(cum_time + i*chunk_dur)
+                .set_duration(chunk_dur)
+                .set_position(('center', int(TARGET_RESOLUTION[1]*0.8)))
+            )
+            subtitles.append(txt_clip)
+        cum_time += seg_dur
+    return subtitles
+def generate_video(topic, include_captions, music_file, voice_choice):
+    if not os.path.exists(SOURCE_VIDEO_PATH):
+        raise FileNotFoundError(f"{SOURCE_VIDEO_PATH} not found.")
+    # 1) get script & sections
+    script = generate_script(topic)
+    sections = parse_script(script)
+    # 2) TTS each sentence
+    tmp = tempfile.mkdtemp()
+    tts_paths = [
+        generate_tts_audio(sentence, voice_choice, tmp)
+        for _, sentence in sections
+    ]
+    # 3) concatenate all TTS audios
+    clips_audio = [AudioFileClip(p) for p in tts_paths]
+    narration = concatenate_audioclips(clips_audio)
+    total_dur = narration.duration
+    # 4) pick one random subclip
+    src = VideoFileClip(SOURCE_VIDEO_PATH)
+    start = random.uniform(0, max(0, src.duration - total_dur))
+    video = src.subclip(start, start + total_dur).resize(TARGET_RESOLUTION)
+    src.close()
+    # 5) overlay narration
+    video = video.set_audio(narration)
+    # 6) optional subtitles
+    if include_captions:
+        subs = add_pillow_subtitles(video, sections)
+        video = CompositeVideoClip([video, *subs])
+    # 7) optional background music
+    if music_file:
+        bg = AudioFileClip(music_file)
+        # loop or trim
+        loops = math.ceil(video.duration / bg.duration)
+        bg = concatenate_audioclips([bg]*loops).subclip(0, video.duration).volumex(BG_MUSIC_VOLUME)
+        mixed = CompositeAudioClip([video.audio, bg])
+        video = video.set_audio(mixed)
+    # 8) export
+    video.write_videofile(OUTPUT_VIDEO_PATH, codec='libx264', fps=30, preset='veryfast')
+    # cleanup
+    shutil.rmtree(tmp)
+    return OUTPUT_VIDEO_PATH
+# ────────── GRADIO UI ──────────
+VOICE_MAP = {
+    'Emma (Female)': 'af_heart',
+    'Bella (Female)': 'af_bella',
+    # … add others as needed …
+}
 iface = gr.Interface(
     fn=generate_video,
     inputs=[
+        gr.Textbox(label="Video Concept"),
+        gr.Checkbox(label="Include Captions"),
+        gr.File(label="Background Music (MP3)", file_types=[".mp3"]),
+        gr.Dropdown(list(VOICE_MAP.keys()), label="Voice", value="Emma (Female)")
     ],
     outputs=gr.Video(label="Generated Video"),
+    title="AI Documentary Video Generator",
+    description="Cuts one ~64 s clip from your 13 min video, adds AI narration & TikTok-style subtitles."
 )
 if __name__ == "__main__":
+    iface.launch(share=True)