Spaces:
Running
Running
File size: 3,435 Bytes
4cc0ea8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
import requests
import uuid
import subprocess
import time
def extract_audio_from_video(video_path, output_format="mp3"):
if not video_path:
return None
output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"
try:
cmd = [
"ffmpeg",
"-i", video_path,
"-vn",
"-c:a", "libmp3lame" if output_format == "mp3" else output_format,
"-q:a", "9",
"-ac", "1",
"-ar", "12000",
"-y", output_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if os.path.exists(output_path):
return output_path
else:
raise Exception("Audio extraction failed")
except Exception as e:
raise Exception(f"Error extracting audio: {str(e)}")
def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
if not api_key:
raise Exception("API key required")
url = "https://api.elevenlabs.io/v1/speech-to-text"
headers = {"xi-api-key": api_key}
try:
with open(audio_path, "rb") as file:
response = requests.post(
url,
headers=headers,
files={"file": file, "model_id": (None, model_id)},
timeout=120
)
if response.status_code == 200:
result = response.json()
transcript_text = result.get("text", "")
# Save transcript to file
transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
with open(transcript_file, "w", encoding="utf-8") as f:
f.write(transcript_text)
return transcript_text, transcript_file, "Transcription completed successfully"
else:
raise Exception(f"API error: {response.status_code}")
except Exception as e:
raise Exception(f"Transcription failed: {str(e)}")
def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
try:
print("Starting video processing...")
start = time.time()
audio_path = extract_audio_from_video(video_path, audio_format)
print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
transcription, transcript_path, transcription_status = transcribe_audio(
audio_path,
elevenlabs_api_key,
model_id
)
if not transcription:
return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...")
# Generate summary or quiz from transcription
formatted_output, json_path, txt_path = analyze_document(
transcription,
gemini_api_key,
language,
content_type
)
print(f"Total processing time: {time.time() - start:.2f}s")
return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
except Exception as e:
error_message = f"Error processing video: {str(e)}"
return None, error_message, None, error_message, error_message, None, None |