Spaces:
Running
Running
File size: 6,883 Bytes
fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 f96caac 6621c82 f96caac 6621c82 f96caac fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 fd13285 6621c82 f96caac 6621c82 fd13285 6621c82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
import os
import datetime
import hashlib
import requests
import numpy as np
import gradio as gr
import whisper
import srt
import torch
LANGUAGE_OPTIONS = {
"Afrikaans": "af",
"Arabic": "ar",
"Azerbaijani": "az",
"Belarusian": "be",
"Bulgarian": "bg",
"Bengali": "bn",
"Catalan": "ca",
"Czech": "cs",
"Welsh": "cy",
"Danish": "da",
"German": "de",
"Greek": "el",
"English": "en",
"Spanish": "es",
"Estonian": "et",
"Persian": "fa",
"Finnish": "fi",
"French": "fr",
"Irish": "ga",
"Galician": "gl",
"Gujarati": "gu",
"Hebrew": "he",
"Hindi": "hi",
"Croatian": "hr",
"Hungarian": "hu",
"Armenian": "hy",
"Indonesian": "id",
"Icelandic": "is",
"Italian": "it",
"Japanese": "ja",
"Georgian": "ka",
"Kazakh": "kk",
"Khmer": "km",
"Kannada": "kn",
"Korean": "ko",
"Lithuanian": "lt",
"Latvian": "lv",
"Macedonian": "mk",
"Malayalam": "ml",
"Mongolian": "mn",
"Marathi": "mr",
"Malay": "ms",
"Maltese": "mt",
"Nepali": "ne",
"Dutch": "nl",
"Norwegian": "no",
"Odia": "or",
"Punjabi": "pa",
"Polish": "pl",
"Portuguese": "pt",
"Romanian": "ro",
"Russian": "ru",
"Sinhala": "si",
"Slovak": "sk",
"Slovenian": "sl",
"Albanian": "sq",
"Serbian": "sr",
"Swedish": "sv",
"Swahili": "sw",
"Tamil": "ta",
"Telugu": "te",
"Thai": "th",
"Turkish": "tr",
"Ukrainian": "uk",
"Urdu": "ur",
"Vietnamese": "vi",
"Chinese": "zh"
}
def transcribe_audio(audio_file_path, model_size='base', language="en"):
model = whisper.load_model(model_size)
model.to("cpu")
result = model.transcribe(audio_file_path, language=language)
transcription = result["text"]
segments = result["segments"]
try:
from whisper.utils import format_srt
srt_text = format_srt(segments)
except Exception:
srt_text = generate_srt(segments)
return transcription, srt_text, segments
def generate_srt(segments):
import datetime
import srt
subtitles = []
for i, seg in enumerate(segments):
start_td = datetime.timedelta(seconds=seg["start"])
end_td = datetime.timedelta(seconds=seg["end"])
subtitle = srt.Subtitle(index=i+1, start=start_td, end=end_td, content=seg["text"])
subtitles.append(subtitle)
return srt.compose(subtitles)
def prepare_chapter_prompt(srt_text):
"""
Prepare a complete prompt (system + user instructions) for ChatGPT models.
Although the prompt is in English, it instructs the model to output chapter headers in the same language as the provided SRT transcript.
The output format should be one chapter per line: "mm:ss Chapter Title".
"""
system_prompt = (
"You are a highly skilled video content segmentation and optimization expert. "
"Your task is to analyze a transcript of a YouTube video provided in SRT format and produce engaging and concise chapter headers. "
"Each chapter header must be on its own line in the exact format: 'mm:ss Chapter Title'.\n\n"
"- 'mm:ss' represents the starting time of the chapter (minutes and seconds).\n"
"- 'Chapter Title' must be a catchy, audience-friendly title that summarizes the key idea or transition at that point in the video.\n\n"
"IMPORTANT: Although these instructions are in English, please ensure that your output is in the same language as the provided SRT transcript."
)
user_prompt = (
"Below is the transcript of a YouTube video in SRT format:\n\n"
"```\n"
f"{srt_text}\n"
"```\n\n"
"Please generate only the chapter breakdown using the guidelines above. "
"Each chapter header should be formatted as:\n"
"mm:ss Chapter Title"
)
final_prompt = system_prompt + "\n\n" + user_prompt
return final_prompt
def format_prompt_html(prompt):
"""
Displays the prompt in a read-only textarea using Gradio's color variables for background and text.
Includes a 'Copy Prompt' button (blue) and a short 'Prompt Copied!' confirmation message.
"""
html_content = f"""
<div style="display: flex; flex-direction: column; gap: 10px; margin-top: 10px;">
<textarea id="prompt_text" rows="10"
style="width: 100%; resize: vertical;
background-color: var(--block-background-fill);
color: var(--block-text-color);
border: 1px solid var(--block-border-color);
border-radius: 4px;"
readonly>{prompt}</textarea>
<button
style="width: 150px; padding: 8px;
background-color: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;"
onclick="
navigator.clipboard.writeText(document.getElementById('prompt_text').value);
const copiedMsg = document.getElementById('copied_msg');
copiedMsg.style.display = 'inline';
setTimeout(() => copiedMsg.style.display = 'none', 2000);
">
Copy Prompt
</button>
<span id="copied_msg" style="display: none; color: var(--primary-text-color); font-weight: bold;">Prompt Copied!</span>
</div>
"""
return html_content
def process_audio(audio, language_name):
lang_code = LANGUAGE_OPTIONS.get(language_name, "en")
try:
transcription, srt_text, segments = transcribe_audio(audio, model_size='base', language=lang_code)
except Exception as e:
return f"Error during transcription: {str(e)}", "", ""
chapter_prompt = prepare_chapter_prompt(srt_text)
prompt_html = format_prompt_html(chapter_prompt)
return transcription, srt_text, prompt_html
iface = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(type="filepath", label="Upload Audio"),
gr.Dropdown(choices=list(LANGUAGE_OPTIONS.keys()), label="Audio Language", value="English")
],
outputs=[
gr.Textbox(label="Full Transcription", lines=10),
gr.Textbox(label="SRT File Content", lines=10),
gr.HTML(label="Prepared Chapter Prompt (Copy & Paste into ChatGPT)")
],
title="Video Chapter Splitter from Audio (MP3)",
description=(
"Upload an audio file (e.g., MP3) of your YouTube video and select the audio language. "
"The app will transcribe the audio using Whisper, generate subtitles in SRT format, "
"and prepare a single, complete prompt that instructs ChatGPT -> o1 model to generate a chapter breakdown in the format 'mm:ss Chapter Title'.\n\n"
"Click the 'Copy Prompt' button to copy the entire prompt, and a brief 'Prompt Copied!' message will appear."
)
)
if __name__ == "__main__":
iface.launch() |