sda / app.py
Daemontatox's picture
Update app.py
b6d93c0 verified
raw
history blame
6.02 kB
import gradio as gr
import tempfile
import os
from moviepy.editor import *
from pydub import AudioSegment
import whisper
import json
import requests
from dotenv import load_dotenv
load_dotenv()
# Configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free"
TARGET_RESOLUTION = (1080, 1920)
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
CAPTION_COLOR = "white"
YOUR_SITE_URL = "http://localhost" # Replace with your site URL
YOUR_SITE_NAME = "YouTube Short Creator" # Replace with your site name
# Placeholder for Kokoro TTS
def kokoro_tts(text):
# TODO: Replace with actual Kokoro TTS implementation
# Should return path to generated audio file
return "dummy_audio.wav"
def generate_script(topic):
try:
response = requests.post(
url="https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": YOUR_SITE_URL,
"X-Title": YOUR_SITE_NAME,
},
data=json.dumps({
"model": OPENROUTER_MODEL,
"messages": [
{
"role": "user",
"content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle"
}
],
}),
timeout=10
)
response.raise_for_status()
response_data = response.json()
script_json = response_data["choices"][0]["message"]["content"]
return json.loads(script_json)
except requests.exceptions.ConnectionError as e:
raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.")
except requests.exceptions.HTTPError as e:
raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.")
except requests.exceptions.RequestException as e:
raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}")
except (json.JSONDecodeError, KeyError):
raise Exception("Failed to parse API response as JSON or unexpected response format.")
def generate_audio(script_parts, temp_folder):
full_audio = AudioSegment.empty()
for part in script_parts:
audio_file = kokoro_tts(part)
audio_segment = AudioSegment.from_file(audio_file)
silence = AudioSegment.silent(duration=300) # 0.3s gap
full_audio += audio_segment + silence
full_audio = full_audio[:-300] # Remove last silence
audio_path = os.path.join(temp_folder, "full_audio.wav")
full_audio.export(audio_path, format="wav")
return audio_path
def generate_subtitles(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path, word_timestamps=True)
return result['segments']
def process_background_video(audio_duration):
background = VideoFileClip("video.mp4")
background = background.resize(height=1920)
if background.w > 1080:
background = background.crop(x_center=background.w/2, width=1080)
required_duration = audio_duration + 0.5
if background.duration < required_duration:
n_loops = int(required_duration / background.duration) + 1
background = concatenate_videoclips([background] * n_loops)
return background.set_duration(required_duration)
def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'):
subtitle_y = video_height - 200
all_words = [word for segment in segments for word in segment['words']]
chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)]
subtitle_clips = []
for chunk in chunks:
for i, word in enumerate(chunk):
line_clip = create_text_line(chunk, i, font_size, color, highlight_color)
line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y))
subtitle_clips.append(line_clip)
return subtitle_clips
def create_text_line(words, highlighted_index, font_size, color, highlight_color):
space_clip = TextClip(" ", fontsize=font_size, color=color)
space_width = space_clip.w
text_clips = []
total_width = 0
for i, word in enumerate(words):
c = highlight_color if i == highlighted_index else color
text_clip = TextClip(word['word'], fontsize=font_size, color=c)
text_clips.append(text_clip)
total_width += text_clip.w + (space_width if i < len(words) - 1 else 0)
current_x = -total_width / 2
positioned_clips = []
for clip in text_clips:
positioned_clips.append(clip.set_pos((current_x, 0)))
current_x += clip.w + space_width
return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h))
def generate_video(topic):
with tempfile.TemporaryDirectory() as temp_folder:
script_parts = generate_script(topic)
audio_path = generate_audio(script_parts, temp_folder)
audio_duration = AudioSegment.from_file(audio_path).duration_seconds
segments = generate_subtitles(audio_path)
background = process_background_video(audio_duration)
subtitle_clips = create_subtitle_clips(segments)
audio_clip = AudioFileClip(audio_path)
final_video = background.set_audio(audio_clip)
final_video = CompositeVideoClip([final_video] + subtitle_clips)
output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME)
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
return output_path
# Gradio UI
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(label="Topic"),
outputs=gr.Video(label="Generated YouTube Short"),
title="YouTube Short Creator"
)
if __name__ == "__main__":
iface.launch()