|
import os |
|
import gradio as gr |
|
import requests |
|
import json |
|
from moviepy import VideoFileClip |
|
import uuid |
|
|
|
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", None) |
|
|
|
def extract_audio(video_path, output_format="mp3"): |
|
if not video_path: |
|
return None, "No video provided" |
|
|
|
output_path = f"extracted_audio_{uuid.uuid4().hex[:8]}.{output_format}" |
|
|
|
try: |
|
video = VideoFileClip(video_path) |
|
video.audio.write_audiofile(output_path, logger=None) |
|
video.close() |
|
return output_path, f"Audio extracted successfully" |
|
except Exception as e: |
|
return None, f"Error extracting audio: {str(e)}" |
|
|
|
def save_transcription(transcription): |
|
if "error" in transcription: |
|
return None, transcription["error"] |
|
transcript_filename = f"transcription_{uuid.uuid4().hex[:8]}.txt" |
|
|
|
try: |
|
with open(transcript_filename, "w", encoding="utf-8") as f: |
|
f.write(transcription.get('text', 'No text found')) |
|
|
|
return transcript_filename, "Transcription saved as text file" |
|
except Exception as e: |
|
return None, f"Error saving transcription: {str(e)}" |
|
|
|
def process_video_file(video_file, output_format, api_key, model_id): |
|
if video_file is None: |
|
return None, "Please upload a video file", None, "No video provided" |
|
|
|
audio_path, message = extract_audio(video_file, output_format) |
|
|
|
if audio_path and os.path.exists(audio_path): |
|
transcription = transcribe_audio(audio_path, api_key, model_id) |
|
transcript_file, transcript_message = save_transcription(transcription) |
|
return audio_path, message, transcript_file, transcript_message |
|
else: |
|
return None, message, None, "Audio extraction failed, cannot transcribe" |
|
|
|
def process_video_url(video_url, output_format, api_key, model_id): |
|
if not video_url.strip(): |
|
return None, "Please enter a video URL", None, "No URL provided" |
|
|
|
video_path, error = download_video_from_url(video_url) |
|
if error: |
|
return None, error, None, "Video download failed, cannot transcribe" |
|
|
|
audio_path, message = extract_audio(video_path, output_format) |
|
if video_path and os.path.exists(video_path): |
|
try: |
|
os.remove(video_path) |
|
except: |
|
pass |
|
|
|
if audio_path and os.path.exists(audio_path): |
|
transcription = transcribe_audio(audio_path, api_key, model_id) |
|
transcript_file, transcript_message = save_transcription(transcription) |
|
return audio_path, message, transcript_file, transcript_message |
|
else: |
|
return None, message, None, "Audio extraction failed, cannot transcribe" |
|
|
|
def transcribe_audio(audio_file, api_key, model_id="scribe_v1"): |
|
if not api_key: |
|
return {"error": "Please provide an API key"} |
|
|
|
url = "https://api.elevenlabs.io/v1/speech-to-text" |
|
headers = { |
|
"xi-api-key": api_key |
|
} |
|
|
|
try: |
|
with open(audio_file, "rb") as f: |
|
files = { |
|
"file": f, |
|
"model_id": (None, model_id) |
|
} |
|
response = requests.post(url, headers=headers, files=files) |
|
response.raise_for_status() |
|
result = response.json() |
|
return result |
|
except requests.exceptions.RequestException as e: |
|
return {"error": f"API request failed: {str(e)}"} |
|
except json.JSONDecodeError: |
|
return {"error": "Failed to parse API response"} |
|
|
|
with gr.Blocks(title="Video to Audio to Transcription") as app: |
|
gr.Markdown("# Video => Audio => Transcription") |
|
|
|
api_key = gr.Textbox( |
|
placeholder="Enter your ElevenLabs API key", |
|
label="ElevenLabs API Key", |
|
type="password", |
|
value=ELEVENLABS_API_KEY |
|
) |
|
|
|
model_id = gr.Dropdown( |
|
choices=["scribe_v1"], |
|
value="scribe_v1", |
|
label="Transcription Model" |
|
) |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Upload Video"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
video_input = gr.Video(label="Upload Video") |
|
format_choice_file = gr.Radio(["mp3", "wav"], value="mp3", label="Output Format") |
|
extract_button_file = gr.Button("Extract Audio & Transcribe") |
|
|
|
with gr.Column(): |
|
audio_output_file = gr.Audio(label="Extracted Audio", type="filepath") |
|
status_output_file = gr.Textbox(label="Audio Extraction Status") |
|
transcript_file_output = gr.File(label="Transcription Text File") |
|
transcript_status_output = gr.Textbox(label="Transcription Status") |
|
|
|
extract_button_file.click( |
|
fn=process_video_file, |
|
inputs=[video_input, format_choice_file, api_key, model_id], |
|
outputs=[audio_output_file, status_output_file, transcript_file_output, transcript_status_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |