SimpleLearn_2 / src /video_processing.py
MrSimple01's picture
Upload 8 files
4cc0ea8 verified
raw
history blame
3.44 kB
import os
import requests
import uuid
import subprocess
import time
def extract_audio_from_video(video_path, output_format="mp3"):
if not video_path:
return None
output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"
try:
cmd = [
"ffmpeg",
"-i", video_path,
"-vn",
"-c:a", "libmp3lame" if output_format == "mp3" else output_format,
"-q:a", "9",
"-ac", "1",
"-ar", "12000",
"-y", output_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if os.path.exists(output_path):
return output_path
else:
raise Exception("Audio extraction failed")
except Exception as e:
raise Exception(f"Error extracting audio: {str(e)}")
def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
if not api_key:
raise Exception("API key required")
url = "https://api.elevenlabs.io/v1/speech-to-text"
headers = {"xi-api-key": api_key}
try:
with open(audio_path, "rb") as file:
response = requests.post(
url,
headers=headers,
files={"file": file, "model_id": (None, model_id)},
timeout=120
)
if response.status_code == 200:
result = response.json()
transcript_text = result.get("text", "")
# Save transcript to file
transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
with open(transcript_file, "w", encoding="utf-8") as f:
f.write(transcript_text)
return transcript_text, transcript_file, "Transcription completed successfully"
else:
raise Exception(f"API error: {response.status_code}")
except Exception as e:
raise Exception(f"Transcription failed: {str(e)}")
def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
try:
print("Starting video processing...")
start = time.time()
audio_path = extract_audio_from_video(video_path, audio_format)
print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
transcription, transcript_path, transcription_status = transcribe_audio(
audio_path,
elevenlabs_api_key,
model_id
)
if not transcription:
return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...")
# Generate summary or quiz from transcription
formatted_output, json_path, txt_path = analyze_document(
transcription,
gemini_api_key,
language,
content_type
)
print(f"Total processing time: {time.time() - start:.2f}s")
return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
except Exception as e:
error_message = f"Error processing video: {str(e)}"
return None, error_message, None, error_message, error_message, None, None