import gradio as gr import tempfile import os from moviepy.editor import * from pydub import AudioSegment import whisper import json import requests from dotenv import load_dotenv load_dotenv() # Configuration OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free" TARGET_RESOLUTION = (1080, 1920) OUTPUT_VIDEO_FILENAME = "final_video.mp4" CAPTION_COLOR = "white" YOUR_SITE_URL = "http://localhost" # Replace with your site URL YOUR_SITE_NAME = "YouTube Short Creator" # Replace with your site name # Placeholder for Kokoro TTS def kokoro_tts(text): # TODO: Replace with actual Kokoro TTS implementation # Should return path to generated audio file return "dummy_audio.wav" def generate_script(topic): try: response = requests.post( url="https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "HTTP-Referer": YOUR_SITE_URL, "X-Title": YOUR_SITE_NAME, }, data=json.dumps({ "model": OPENROUTER_MODEL, "messages": [ { "role": "user", "content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle" } ], }), timeout=10 ) response.raise_for_status() response_data = response.json() script_json = response_data["choices"][0]["message"]["content"] return json.loads(script_json) except requests.exceptions.ConnectionError as e: raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.") except requests.exceptions.HTTPError as e: raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.") except requests.exceptions.RequestException as e: raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}") except (json.JSONDecodeError, KeyError): raise Exception("Failed to parse API response as JSON or unexpected response format.") def generate_audio(script_parts, temp_folder): full_audio = AudioSegment.empty() for part in script_parts: audio_file = kokoro_tts(part) audio_segment = AudioSegment.from_file(audio_file) silence = AudioSegment.silent(duration=300) # 0.3s gap full_audio += audio_segment + silence full_audio = full_audio[:-300] # Remove last silence audio_path = os.path.join(temp_folder, "full_audio.wav") full_audio.export(audio_path, format="wav") return audio_path def generate_subtitles(audio_path): model = whisper.load_model("base") result = model.transcribe(audio_path, word_timestamps=True) return result['segments'] def process_background_video(audio_duration): background = VideoFileClip("video.mp4") background = background.resize(height=1920) if background.w > 1080: background = background.crop(x_center=background.w/2, width=1080) required_duration = audio_duration + 0.5 if background.duration < required_duration: n_loops = int(required_duration / background.duration) + 1 background = concatenate_videoclips([background] * n_loops) return background.set_duration(required_duration) def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'): subtitle_y = video_height - 200 all_words = [word for segment in segments for word in segment['words']] chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)] subtitle_clips = [] for chunk in chunks: for i, word in enumerate(chunk): line_clip = create_text_line(chunk, i, font_size, color, highlight_color) line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y)) subtitle_clips.append(line_clip) return subtitle_clips def create_text_line(words, highlighted_index, font_size, color, highlight_color): space_clip = TextClip(" ", fontsize=font_size, color=color) space_width = space_clip.w text_clips = [] total_width = 0 for i, word in enumerate(words): c = highlight_color if i == highlighted_index else color text_clip = TextClip(word['word'], fontsize=font_size, color=c) text_clips.append(text_clip) total_width += text_clip.w + (space_width if i < len(words) - 1 else 0) current_x = -total_width / 2 positioned_clips = [] for clip in text_clips: positioned_clips.append(clip.set_pos((current_x, 0))) current_x += clip.w + space_width return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h)) def generate_video(topic): with tempfile.TemporaryDirectory() as temp_folder: script_parts = generate_script(topic) audio_path = generate_audio(script_parts, temp_folder) audio_duration = AudioSegment.from_file(audio_path).duration_seconds segments = generate_subtitles(audio_path) background = process_background_video(audio_duration) subtitle_clips = create_subtitle_clips(segments) audio_clip = AudioFileClip(audio_path) final_video = background.set_audio(audio_clip) final_video = CompositeVideoClip([final_video] + subtitle_clips) output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME) final_video.write_videofile(output_path, codec="libx264", audio_codec="aac") return output_path # Gradio UI iface = gr.Interface( fn=generate_video, inputs=gr.Textbox(label="Topic"), outputs=gr.Video(label="Generated YouTube Short"), title="YouTube Short Creator" ) if __name__ == "__main__": iface.launch()