import os import requests import uuid import subprocess import time def extract_audio_from_video(video_path, output_format="mp3"): if not video_path: return None output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}" try: cmd = [ "ffmpeg", "-i", video_path, "-vn", "-c:a", "libmp3lame" if output_format == "mp3" else output_format, "-q:a", "9", "-ac", "1", "-ar", "12000", "-y", output_path ] subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if os.path.exists(output_path): return output_path else: raise Exception("Audio extraction failed") except Exception as e: raise Exception(f"Error extracting audio: {str(e)}") def transcribe_audio(audio_path, api_key, model_id="scribe_v1"): if not api_key: raise Exception("API key required") url = "https://api.elevenlabs.io/v1/speech-to-text" headers = {"xi-api-key": api_key} try: with open(audio_path, "rb") as file: response = requests.post( url, headers=headers, files={"file": file, "model_id": (None, model_id)}, timeout=120 ) if response.status_code == 200: result = response.json() transcript_text = result.get("text", "") # Save transcript to file transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt" with open(transcript_file, "w", encoding="utf-8") as f: f.write(transcript_text) return transcript_text, transcript_file, "Transcription completed successfully" else: raise Exception(f"API error: {response.status_code}") except Exception as e: raise Exception(f"Transcription failed: {str(e)}") def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type): try: print("Starting video processing...") start = time.time() audio_path = extract_audio_from_video(video_path, audio_format) print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...") transcription, transcript_path, transcription_status = transcribe_audio( audio_path, elevenlabs_api_key, model_id ) if not transcription: return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...") # Generate summary or quiz from transcription formatted_output, json_path, txt_path = analyze_document( transcription, gemini_api_key, language, content_type ) print(f"Total processing time: {time.time() - start:.2f}s") return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path except Exception as e: error_message = f"Error processing video: {str(e)}" return None, error_message, None, error_message, error_message, None, None