Spaces:

MrSimple01
/

SimpleLearn_2

Running

App Files Files Community

SimpleLearn_2 / src /video_processing.py

MrSimple01

Upload 8 files

4cc0ea8 verified 12 days ago

raw

history blame

3.44 kB

	import os
	import requests
	import uuid
	import subprocess
	import time

	def extract_audio_from_video(video_path, output_format="mp3"):
	if not video_path:
	return None

	output_path = f"audio_{uuid.uuid4().hex[:6]}.{output_format}"

	try:
	cmd = [
	"ffmpeg",
	"-i", video_path,
	"-vn",
	"-c:a", "libmp3lame" if output_format == "mp3" else output_format,
	"-q:a", "9",
	"-ac", "1",
	"-ar", "12000",
	"-y", output_path
	]

	subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

	if os.path.exists(output_path):
	return output_path
	else:
	raise Exception("Audio extraction failed")
	except Exception as e:
	raise Exception(f"Error extracting audio: {str(e)}")

	def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
	if not api_key:
	raise Exception("API key required")

	url = "https://api.elevenlabs.io/v1/speech-to-text"
	headers = {"xi-api-key": api_key}

	try:
	with open(audio_path, "rb") as file:
	response = requests.post(
	url,
	headers=headers,
	files={"file": file, "model_id": (None, model_id)},
	timeout=120
	)

	if response.status_code == 200:
	result = response.json()
	transcript_text = result.get("text", "")

	# Save transcript to file
	transcript_file = f"transcript_{uuid.uuid4().hex[:6]}.txt"
	with open(transcript_file, "w", encoding="utf-8") as f:
	f.write(transcript_text)

	return transcript_text, transcript_file, "Transcription completed successfully"
	else:
	raise Exception(f"API error: {response.status_code}")
	except Exception as e:
	raise Exception(f"Transcription failed: {str(e)}")

	def process_video_file(video_path, audio_format, elevenlabs_api_key, model_id, gemini_api_key, language, content_type):
	try:
	print("Starting video processing...")
	start = time.time()

	audio_path = extract_audio_from_video(video_path, audio_format)
	print(f"Audio extracted in {time.time() - start:.2f}s. Transcribing...")

	transcription, transcript_path, transcription_status = transcribe_audio(
	audio_path,
	elevenlabs_api_key,
	model_id
	)

	if not transcription:
	return audio_path, "Audio extracted, but transcription failed", None, transcription_status, None, None, None

	print(f"Transcription completed in {time.time() - start:.2f}s. Analyzing content...")

	# Generate summary or quiz from transcription
	formatted_output, json_path, txt_path = analyze_document(
	transcription,
	gemini_api_key,
	language,
	content_type
	)

	print(f"Total processing time: {time.time() - start:.2f}s")
	return audio_path, "Processing completed successfully", transcript_path, transcription_status, formatted_output, txt_path, json_path
	except Exception as e:
	error_message = f"Error processing video: {str(e)}"
	return None, error_message, None, error_message, error_message, None, None