File size: 3,435 Bytes
4cc0ea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import requests
import uuid
import subprocess
import time

def extract_audio_from_video(video_path, output_format="mp3"):
    if not video_path:
        return None
    
    output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"
    
    try:
        cmd = [
            "ffmpeg",
            "-i", video_path,
            "-vn",
            "-c:a", "libmp3lame" if output_format == "mp3" else output_format,
            "-q:a", "9",
            "-ac", "1",
            "-ar", "12000",
            "-y", output_path
        ]
        
        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        
        if os.path.exists(output_path):
            return output_path
        else:
            raise Exception("Audio extraction failed")
    except Exception as e:
        raise Exception(f"Error extracting audio: {str(e)}")

def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
    if not api_key:
        raise Exception("API key required")
    
    url = "https://api.elevenlabs.io/v1/speech-to-text"
    headers = {"xi-api-key": api_key}
    
    try:
        with open(audio_path, "rb") as file:
            response = requests.post(
                url,
                headers=headers,
                files={"file": file, "model_id": (None, model_id)},
                timeout=120
            )
        
        if response.status_code == 200:
            result = response.json()
            transcript_text = result.get("text", "")
            
            # Save transcript to file
            transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
            with open(transcript_file, "w", encoding="utf-8") as f:
                f.write(transcript_text)
            
            return transcript_text, transcript_file, "Transcription completed successfully"
        else:
            raise Exception(f"API error: {response.status_code}")
    except Exception as e:
        raise Exception(f"Transcription failed: {str(e)}")

def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
    try:
        print("Starting video processing...")
        start = time.time()
        
        audio_path = extract_audio_from_video(video_path, audio_format)
        print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")
        
        transcription, transcript_path, transcription_status = transcribe_audio(
            audio_path, 
            elevenlabs_api_key, 
            model_id
        )
        
        if not transcription:
            return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None
        
        print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...")
        
        # Generate summary or quiz from transcription
        formatted_output, json_path, txt_path = analyze_document(
            transcription, 
            gemini_api_key, 
            language, 
            content_type
        )
        
        print(f"Total processing time: {time.time() - start:.2f}s")
        return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
    except Exception as e:
        error_message = f"Error processing video: {str(e)}"
        return None, error_message, None, error_message, error_message, None, None