Spaces:
Running
Running
import gradio as gr | |
import tempfile | |
import os | |
from moviepy.editor import * | |
from pydub import AudioSegment | |
import whisper | |
import json | |
import requests | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Configuration | |
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") | |
OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free" | |
TARGET_RESOLUTION = (1080, 1920) | |
OUTPUT_VIDEO_FILENAME = "final_video.mp4" | |
CAPTION_COLOR = "white" | |
YOUR_SITE_URL = "http://localhost" # Replace with your site URL | |
YOUR_SITE_NAME = "YouTube Short Creator" # Replace with your site name | |
# Placeholder for Kokoro TTS | |
def kokoro_tts(text): | |
# TODO: Replace with actual Kokoro TTS implementation | |
# Should return path to generated audio file | |
return "dummy_audio.wav" | |
def generate_script(topic): | |
try: | |
response = requests.post( | |
url="https://openrouter.ai/api/v1/chat/completions", | |
headers={ | |
"Authorization": f"Bearer {OPENROUTER_API_KEY}", | |
"Content-Type": "application/json", | |
"HTTP-Referer": YOUR_SITE_URL, | |
"X-Title": YOUR_SITE_NAME, | |
}, | |
data=json.dumps({ | |
"model": OPENROUTER_MODEL, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle" | |
} | |
], | |
}), | |
timeout=10 | |
) | |
response.raise_for_status() | |
response_data = response.json() | |
script_json = response_data["choices"][0]["message"]["content"] | |
return json.loads(script_json) | |
except requests.exceptions.ConnectionError as e: | |
raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.") | |
except requests.exceptions.HTTPError as e: | |
raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.") | |
except requests.exceptions.RequestException as e: | |
raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}") | |
except (json.JSONDecodeError, KeyError): | |
raise Exception("Failed to parse API response as JSON or unexpected response format.") | |
def generate_audio(script_parts, temp_folder): | |
full_audio = AudioSegment.empty() | |
for part in script_parts: | |
audio_file = kokoro_tts(part) | |
audio_segment = AudioSegment.from_file(audio_file) | |
silence = AudioSegment.silent(duration=300) # 0.3s gap | |
full_audio += audio_segment + silence | |
full_audio = full_audio[:-300] # Remove last silence | |
audio_path = os.path.join(temp_folder, "full_audio.wav") | |
full_audio.export(audio_path, format="wav") | |
return audio_path | |
def generate_subtitles(audio_path): | |
model = whisper.load_model("base") | |
result = model.transcribe(audio_path, word_timestamps=True) | |
return result['segments'] | |
def process_background_video(audio_duration): | |
background = VideoFileClip("video.mp4") | |
background = background.resize(height=1920) | |
if background.w > 1080: | |
background = background.crop(x_center=background.w/2, width=1080) | |
required_duration = audio_duration + 0.5 | |
if background.duration < required_duration: | |
n_loops = int(required_duration / background.duration) + 1 | |
background = concatenate_videoclips([background] * n_loops) | |
return background.set_duration(required_duration) | |
def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'): | |
subtitle_y = video_height - 200 | |
all_words = [word for segment in segments for word in segment['words']] | |
chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)] | |
subtitle_clips = [] | |
for chunk in chunks: | |
for i, word in enumerate(chunk): | |
line_clip = create_text_line(chunk, i, font_size, color, highlight_color) | |
line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y)) | |
subtitle_clips.append(line_clip) | |
return subtitle_clips | |
def create_text_line(words, highlighted_index, font_size, color, highlight_color): | |
space_clip = TextClip(" ", fontsize=font_size, color=color) | |
space_width = space_clip.w | |
text_clips = [] | |
total_width = 0 | |
for i, word in enumerate(words): | |
c = highlight_color if i == highlighted_index else color | |
text_clip = TextClip(word['word'], fontsize=font_size, color=c) | |
text_clips.append(text_clip) | |
total_width += text_clip.w + (space_width if i < len(words) - 1 else 0) | |
current_x = -total_width / 2 | |
positioned_clips = [] | |
for clip in text_clips: | |
positioned_clips.append(clip.set_pos((current_x, 0))) | |
current_x += clip.w + space_width | |
return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h)) | |
def generate_video(topic): | |
with tempfile.TemporaryDirectory() as temp_folder: | |
script_parts = generate_script(topic) | |
audio_path = generate_audio(script_parts, temp_folder) | |
audio_duration = AudioSegment.from_file(audio_path).duration_seconds | |
segments = generate_subtitles(audio_path) | |
background = process_background_video(audio_duration) | |
subtitle_clips = create_subtitle_clips(segments) | |
audio_clip = AudioFileClip(audio_path) | |
final_video = background.set_audio(audio_clip) | |
final_video = CompositeVideoClip([final_video] + subtitle_clips) | |
output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME) | |
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac") | |
return output_path | |
# Gradio UI | |
iface = gr.Interface( | |
fn=generate_video, | |
inputs=gr.Textbox(label="Topic"), | |
outputs=gr.Video(label="Generated YouTube Short"), | |
title="YouTube Short Creator" | |
) | |
if __name__ == "__main__": | |
iface.launch() |