Spaces:
Running
Running
File size: 6,018 Bytes
3d3cf6f c25723f 3d3cf6f c25723f 3d3cf6f 0e69295 3d3cf6f 8a41c00 878d3d4 3d3cf6f 8a41c00 4217c2c 3d3cf6f 0e69295 4217c2c 0e69295 3d3cf6f 4217c2c 3d3cf6f ad33580 4217c2c e6bebc2 4217c2c ad33580 4217c2c ad33580 4217c2c 3d3cf6f 878d3d4 3d3cf6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import tempfile
import os
from moviepy.editor import *
from pydub import AudioSegment
import whisper
import json
import requests
from dotenv import load_dotenv
load_dotenv()
# Configuration
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free"
TARGET_RESOLUTION = (1080, 1920)
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
CAPTION_COLOR = "white"
YOUR_SITE_URL = "http://localhost" # Replace with your site URL
YOUR_SITE_NAME = "YouTube Short Creator" # Replace with your site name
# Placeholder for Kokoro TTS
def kokoro_tts(text):
# TODO: Replace with actual Kokoro TTS implementation
# Should return path to generated audio file
return "dummy_audio.wav"
def generate_script(topic):
try:
response = requests.post(
url="https://openrouter.ai/api/v1/chat/completions",
headers={
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": YOUR_SITE_URL,
"X-Title": YOUR_SITE_NAME,
},
data=json.dumps({
"model": OPENROUTER_MODEL,
"messages": [
{
"role": "user",
"content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle"
}
],
}),
timeout=10
)
response.raise_for_status()
response_data = response.json()
script_json = response_data["choices"][0]["message"]["content"]
return json.loads(script_json)
except requests.exceptions.ConnectionError as e:
raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.")
except requests.exceptions.HTTPError as e:
raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.")
except requests.exceptions.RequestException as e:
raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}")
except (json.JSONDecodeError, KeyError):
raise Exception("Failed to parse API response as JSON or unexpected response format.")
def generate_audio(script_parts, temp_folder):
full_audio = AudioSegment.empty()
for part in script_parts:
audio_file = kokoro_tts(part)
audio_segment = AudioSegment.from_file(audio_file)
silence = AudioSegment.silent(duration=300) # 0.3s gap
full_audio += audio_segment + silence
full_audio = full_audio[:-300] # Remove last silence
audio_path = os.path.join(temp_folder, "full_audio.wav")
full_audio.export(audio_path, format="wav")
return audio_path
def generate_subtitles(audio_path):
model = whisper.load_model("base")
result = model.transcribe(audio_path, word_timestamps=True)
return result['segments']
def process_background_video(audio_duration):
background = VideoFileClip("video.mp4")
background = background.resize(height=1920)
if background.w > 1080:
background = background.crop(x_center=background.w/2, width=1080)
required_duration = audio_duration + 0.5
if background.duration < required_duration:
n_loops = int(required_duration / background.duration) + 1
background = concatenate_videoclips([background] * n_loops)
return background.set_duration(required_duration)
def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'):
subtitle_y = video_height - 200
all_words = [word for segment in segments for word in segment['words']]
chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)]
subtitle_clips = []
for chunk in chunks:
for i, word in enumerate(chunk):
line_clip = create_text_line(chunk, i, font_size, color, highlight_color)
line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y))
subtitle_clips.append(line_clip)
return subtitle_clips
def create_text_line(words, highlighted_index, font_size, color, highlight_color):
space_clip = TextClip(" ", fontsize=font_size, color=color)
space_width = space_clip.w
text_clips = []
total_width = 0
for i, word in enumerate(words):
c = highlight_color if i == highlighted_index else color
text_clip = TextClip(word['word'], fontsize=font_size, color=c)
text_clips.append(text_clip)
total_width += text_clip.w + (space_width if i < len(words) - 1 else 0)
current_x = -total_width / 2
positioned_clips = []
for clip in text_clips:
positioned_clips.append(clip.set_pos((current_x, 0)))
current_x += clip.w + space_width
return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h))
def generate_video(topic):
with tempfile.TemporaryDirectory() as temp_folder:
script_parts = generate_script(topic)
audio_path = generate_audio(script_parts, temp_folder)
audio_duration = AudioSegment.from_file(audio_path).duration_seconds
segments = generate_subtitles(audio_path)
background = process_background_video(audio_duration)
subtitle_clips = create_subtitle_clips(segments)
audio_clip = AudioFileClip(audio_path)
final_video = background.set_audio(audio_clip)
final_video = CompositeVideoClip([final_video] + subtitle_clips)
output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME)
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
return output_path
# Gradio UI
iface = gr.Interface(
fn=generate_video,
inputs=gr.Textbox(label="Topic"),
outputs=gr.Video(label="Generated YouTube Short"),
title="YouTube Short Creator"
)
if __name__ == "__main__":
iface.launch() |