File size: 6,018 Bytes
3d3cf6f
c25723f
3d3cf6f
 
c25723f
3d3cf6f
0e69295
3d3cf6f
8a41c00
 
 
878d3d4
3d3cf6f
8a41c00
4217c2c
3d3cf6f
0e69295
 
4217c2c
 
0e69295
3d3cf6f
 
 
 
 
4217c2c
3d3cf6f
ad33580
 
4217c2c
 
 
 
 
 
 
 
 
 
 
 
e6bebc2
4217c2c
 
 
 
ad33580
4217c2c
 
 
ad33580
 
 
 
 
 
 
4217c2c
 
3d3cf6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878d3d4
 
3d3cf6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import tempfile
import os
from moviepy.editor import *
from pydub import AudioSegment
import whisper
import json
import requests
from dotenv import load_dotenv
load_dotenv()


# Configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free"
TARGET_RESOLUTION = (1080, 1920)
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
CAPTION_COLOR = "white"
YOUR_SITE_URL = "http://localhost"  # Replace with your site URL
YOUR_SITE_NAME = "YouTube Short Creator"  # Replace with your site name

# Placeholder for Kokoro TTS
def kokoro_tts(text):
    # TODO: Replace with actual Kokoro TTS implementation
    # Should return path to generated audio file
    return "dummy_audio.wav"

def generate_script(topic):
    try:
        response = requests.post(
            url="https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "Content-Type": "application/json",
                "HTTP-Referer": YOUR_SITE_URL,
                "X-Title": YOUR_SITE_NAME,
            },
            data=json.dumps({
                "model": OPENROUTER_MODEL,
                "messages": [
                    {
                        "role": "user",
                        "content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle"
                    }
                ],
            }),
            timeout=10
        )
        response.raise_for_status()
        response_data = response.json()
        script_json = response_data["choices"][0]["message"]["content"]
        return json.loads(script_json)
    except requests.exceptions.ConnectionError as e:
        raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.")
    except requests.exceptions.HTTPError as e:
        raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.")
    except requests.exceptions.RequestException as e:
        raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}")
    except (json.JSONDecodeError, KeyError):
        raise Exception("Failed to parse API response as JSON or unexpected response format.")

def generate_audio(script_parts, temp_folder):
    full_audio = AudioSegment.empty()
    for part in script_parts:
        audio_file = kokoro_tts(part)
        audio_segment = AudioSegment.from_file(audio_file)
        silence = AudioSegment.silent(duration=300)  # 0.3s gap
        full_audio += audio_segment + silence
    full_audio = full_audio[:-300]  # Remove last silence
    audio_path = os.path.join(temp_folder, "full_audio.wav")
    full_audio.export(audio_path, format="wav")
    return audio_path

def generate_subtitles(audio_path):
    model = whisper.load_model("base")
    result = model.transcribe(audio_path, word_timestamps=True)
    return result['segments']

def process_background_video(audio_duration):
    background = VideoFileClip("video.mp4")
    background = background.resize(height=1920)
    if background.w > 1080:
        background = background.crop(x_center=background.w/2, width=1080)
    required_duration = audio_duration + 0.5
    if background.duration < required_duration:
        n_loops = int(required_duration / background.duration) + 1
        background = concatenate_videoclips([background] * n_loops)
    return background.set_duration(required_duration)

def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'):
    subtitle_y = video_height - 200
    all_words = [word for segment in segments for word in segment['words']]
    chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)]
    subtitle_clips = []
    for chunk in chunks:
        for i, word in enumerate(chunk):
            line_clip = create_text_line(chunk, i, font_size, color, highlight_color)
            line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y))
            subtitle_clips.append(line_clip)
    return subtitle_clips

def create_text_line(words, highlighted_index, font_size, color, highlight_color):
    space_clip = TextClip(" ", fontsize=font_size, color=color)
    space_width = space_clip.w
    text_clips = []
    total_width = 0
    for i, word in enumerate(words):
        c = highlight_color if i == highlighted_index else color
        text_clip = TextClip(word['word'], fontsize=font_size, color=c)
        text_clips.append(text_clip)
        total_width += text_clip.w + (space_width if i < len(words) - 1 else 0)
    current_x = -total_width / 2
    positioned_clips = []
    for clip in text_clips:
        positioned_clips.append(clip.set_pos((current_x, 0)))
        current_x += clip.w + space_width
    return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h))

def generate_video(topic):
    with tempfile.TemporaryDirectory() as temp_folder:
        script_parts = generate_script(topic)
        audio_path = generate_audio(script_parts, temp_folder)
        audio_duration = AudioSegment.from_file(audio_path).duration_seconds
        segments = generate_subtitles(audio_path)
        background = process_background_video(audio_duration)
        subtitle_clips = create_subtitle_clips(segments)
        audio_clip = AudioFileClip(audio_path)
        final_video = background.set_audio(audio_clip)
        final_video = CompositeVideoClip([final_video] + subtitle_clips)
        output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME)
        final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
        return output_path

# Gradio UI
iface = gr.Interface(
    fn=generate_video,
    inputs=gr.Textbox(label="Topic"),
    outputs=gr.Video(label="Generated YouTube Short"),
    title="YouTube Short Creator"
)

if __name__ == "__main__":
    iface.launch()