Spaces:

testdeep123
/

sda

Running

App Files Files Community

sda / app.py

Daemontatox

Update app.py

b6d93c0 verified 1 day ago

raw

history blame

6.02 kB

	import gradio as gr
	import tempfile
	import os
	from moviepy.editor import *
	from pydub import AudioSegment
	import whisper
	import json
	import requests
	from dotenv import load_dotenv
	load_dotenv()


	# Configuration
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
	OPENROUTER_MODEL = "tngtech/deepseek-r1t-chimera:free"
	TARGET_RESOLUTION = (1080, 1920)
	OUTPUT_VIDEO_FILENAME = "final_video.mp4"
	CAPTION_COLOR = "white"
	YOUR_SITE_URL = "http://localhost" # Replace with your site URL
	YOUR_SITE_NAME = "YouTube Short Creator" # Replace with your site name

	# Placeholder for Kokoro TTS
	def kokoro_tts(text):
	# TODO: Replace with actual Kokoro TTS implementation
	# Should return path to generated audio file
	return "dummy_audio.wav"

	def generate_script(topic):
	try:
	response = requests.post(
	url="https://openrouter.ai/api/v1/chat/completions",
	headers={
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	"HTTP-Referer": YOUR_SITE_URL,
	"X-Title": YOUR_SITE_NAME,
	},
	data=json.dumps({
	"model": OPENROUTER_MODEL,
	"messages": [
	{
	"role": "user",
	"content": f"Generate a script about {topic} divided into parts, and output it as a JSON array of strings. do not say anything esle"
	}
	],
	}),
	timeout=10
	)
	response.raise_for_status()
	response_data = response.json()
	script_json = response_data["choices"][0]["message"]["content"]
	return json.loads(script_json)
	except requests.exceptions.ConnectionError as e:
	raise Exception(f"Failed to connect to OpenRouter API: {str(e)}. Please check your internet connection or DNS settings.")
	except requests.exceptions.HTTPError as e:
	raise Exception(f"OpenRouter API returned an error: {str(e)}. Please verify your API key and model.")
	except requests.exceptions.RequestException as e:
	raise Exception(f"An error occurred while contacting OpenRouter API: {str(e)}")
	except (json.JSONDecodeError, KeyError):
	raise Exception("Failed to parse API response as JSON or unexpected response format.")

	def generate_audio(script_parts, temp_folder):
	full_audio = AudioSegment.empty()
	for part in script_parts:
	audio_file = kokoro_tts(part)
	audio_segment = AudioSegment.from_file(audio_file)
	silence = AudioSegment.silent(duration=300) # 0.3s gap
	full_audio += audio_segment + silence
	full_audio = full_audio[:-300] # Remove last silence
	audio_path = os.path.join(temp_folder, "full_audio.wav")
	full_audio.export(audio_path, format="wav")
	return audio_path

	def generate_subtitles(audio_path):
	model = whisper.load_model("base")
	result = model.transcribe(audio_path, word_timestamps=True)
	return result['segments']

	def process_background_video(audio_duration):
	background = VideoFileClip("video.mp4")
	background = background.resize(height=1920)
	if background.w > 1080:
	background = background.crop(x_center=background.w/2, width=1080)
	required_duration = audio_duration + 0.5
	if background.duration < required_duration:
	n_loops = int(required_duration / background.duration) + 1
	background = concatenate_videoclips([background] * n_loops)
	return background.set_duration(required_duration)

	def create_subtitle_clips(segments, video_height=1920, font_size=24, color='white', highlight_color='yellow'):
	subtitle_y = video_height - 200
	all_words = [word for segment in segments for word in segment['words']]
	chunks = [all_words[i:i+5] for i in range(0, len(all_words), 5)]
	subtitle_clips = []
	for chunk in chunks:
	for i, word in enumerate(chunk):
	line_clip = create_text_line(chunk, i, font_size, color, highlight_color)
	line_clip = line_clip.set_start(word['start']).set_end(word['end']).set_pos(('center', subtitle_y))
	subtitle_clips.append(line_clip)
	return subtitle_clips

	def create_text_line(words, highlighted_index, font_size, color, highlight_color):
	space_clip = TextClip(" ", fontsize=font_size, color=color)
	space_width = space_clip.w
	text_clips = []
	total_width = 0
	for i, word in enumerate(words):
	c = highlight_color if i == highlighted_index else color
	text_clip = TextClip(word['word'], fontsize=font_size, color=c)
	text_clips.append(text_clip)
	total_width += text_clip.w + (space_width if i < len(words) - 1 else 0)
	current_x = -total_width / 2
	positioned_clips = []
	for clip in text_clips:
	positioned_clips.append(clip.set_pos((current_x, 0)))
	current_x += clip.w + space_width
	return CompositeVideoClip(positioned_clips, size=(total_width, text_clips[0].h))

	def generate_video(topic):
	with tempfile.TemporaryDirectory() as temp_folder:
	script_parts = generate_script(topic)
	audio_path = generate_audio(script_parts, temp_folder)
	audio_duration = AudioSegment.from_file(audio_path).duration_seconds
	segments = generate_subtitles(audio_path)
	background = process_background_video(audio_duration)
	subtitle_clips = create_subtitle_clips(segments)
	audio_clip = AudioFileClip(audio_path)
	final_video = background.set_audio(audio_clip)
	final_video = CompositeVideoClip([final_video] + subtitle_clips)
	output_path = os.path.join(temp_folder, OUTPUT_VIDEO_FILENAME)
	final_video.write_videofile(output_path, codec="libx264", audio_codec="aac")
	return output_path

	# Gradio UI
	iface = gr.Interface(
	fn=generate_video,
	inputs=gr.Textbox(label="Topic"),
	outputs=gr.Video(label="Generated YouTube Short"),
	title="YouTube Short Creator"
	)

	if __name__ == "__main__":
	iface.launch()