import gradio as gr import os from moviepy.editor import VideoFileClip from transformers import pipeline # Load models asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") stored_transcript = "" def transcribe_from_video(video_file): global stored_transcript if video_file is None: return "Error: No video file provided.", "" try: video = VideoFileClip(video_file) audio_path = "temp_audio.wav" video.audio.write_audiofile(audio_path, codec='pcm_s16le') transcription_result = asr(audio_path, return_timestamps=True) transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]]) stored_transcript = transcribed_text if len(transcribed_text.split()) < 50: summarized_text = "Text too short to summarize." else: word_count = len(transcribed_text.split()) max_summary_length = max(50, int(word_count * 0.3)) min_summary_length = max(20, int(word_count * 0.15)) summary = summarizer( transcribed_text, max_length=max_summary_length, min_length=min_summary_length, do_sample=False ) summarized_text = summary[0]["summary_text"] return transcribed_text, summarized_text except Exception as e: return f"Error: {str(e)}", "" def transcribe_from_audio(audio_file): global stored_transcript if audio_file is None: return "Error: No audio recorded.", "" try: transcription_result = asr(audio_file, return_timestamps=True) transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]]) stored_transcript = transcribed_text if len(transcribed_text.split()) < 50: summarized_text = "Text too short to summarize." else: word_count = len(transcribed_text.split()) max_summary_length = max(50, int(word_count * 0.3)) min_summary_length = max(20, int(word_count * 0.15)) summary = summarizer( transcribed_text, max_length=max_summary_length, min_length=min_summary_length, do_sample=False ) summarized_text = summary[0]["summary_text"] return transcribed_text, summarized_text except Exception as e: return f"Error: {str(e)}", "" def answer_question(question): global stored_transcript if not stored_transcript: return "Please transcribe a video or record audio first." result = qa_pipeline(question=question, context=stored_transcript) return result["answer"] # UI with gr.Blocks(css=""" body { background-color: black !important; } .gradio-container { color: #FFFF33 !important; } button { background-color: #FFFF33 !important; color: black !important; border: none !important; } input, textarea, .gr-textbox, .gr-video, .gr-audio { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; } """) as iface: gr.HTML("
Upload a video or record speech to get transcript, summary, and ask questions.
") with gr.Tab("🎥 Video Upload"): video_input = gr.Video(label="Upload Video (.mp4)", interactive=True) transcribe_btn = gr.Button("🚀 Transcribe from Video") transcribed_text_v = gr.Textbox(label="Transcribed Text", lines=8, interactive=False) summarized_text_v = gr.Textbox(label="Summarized Text", lines=8, interactive=False) transcribe_btn.click(fn=transcribe_from_video, inputs=video_input, outputs=[transcribed_text_v, summarized_text_v]) with gr.Tab("🎙️ Record Speech"): audio_input = gr.Audio(type="filepath", label="Record Audio") record_btn = gr.Button("🎧 Transcribe from Audio") transcribed_text_a = gr.Textbox(label="Transcribed Text", lines=8, interactive=False) summarized_text_a = gr.Textbox(label="Summarized Text", lines=8, interactive=False) record_btn.click(fn=transcribe_from_audio, inputs=audio_input, outputs=[transcribed_text_a, summarized_text_a]) with gr.Tab("❓ Ask Questions"): question_input = gr.Textbox(label="Ask a question about the transcript", placeholder="E.g., What was the main topic?") ask_btn = gr.Button("🔍 Get Answer") answer_output = gr.Textbox(label="Answer", interactive=False) ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output) # Launch port = int(os.environ.get('PORT1', 7860)) url = iface.launch(share=True, server_port=port) print(f"Interface is live at: {url}")