|
import gradio as gr |
|
import os |
|
from moviepy.editor import VideoFileClip |
|
from transformers import pipeline |
|
|
|
|
|
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") |
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
|
stored_transcript = "" |
|
|
|
def transcribe_from_video(video_file): |
|
global stored_transcript |
|
if video_file is None: |
|
return "Error: No video file provided.", "" |
|
|
|
try: |
|
video = VideoFileClip(video_file) |
|
audio_path = "temp_audio.wav" |
|
video.audio.write_audiofile(audio_path, codec='pcm_s16le') |
|
|
|
transcription_result = asr(audio_path, return_timestamps=True) |
|
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]]) |
|
stored_transcript = transcribed_text |
|
|
|
if len(transcribed_text.split()) < 50: |
|
summarized_text = "Text too short to summarize." |
|
else: |
|
word_count = len(transcribed_text.split()) |
|
max_summary_length = max(50, int(word_count * 0.3)) |
|
min_summary_length = max(20, int(word_count * 0.15)) |
|
|
|
summary = summarizer( |
|
transcribed_text, |
|
max_length=max_summary_length, |
|
min_length=min_summary_length, |
|
do_sample=False |
|
) |
|
summarized_text = summary[0]["summary_text"] |
|
|
|
return transcribed_text, summarized_text |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}", "" |
|
|
|
def transcribe_from_audio(audio_file): |
|
global stored_transcript |
|
if audio_file is None: |
|
return "Error: No audio recorded.", "" |
|
|
|
try: |
|
transcription_result = asr(audio_file, return_timestamps=True) |
|
transcribed_text = " ".join([chunk["text"] for chunk in transcription_result["chunks"]]) |
|
stored_transcript = transcribed_text |
|
|
|
if len(transcribed_text.split()) < 50: |
|
summarized_text = "Text too short to summarize." |
|
else: |
|
word_count = len(transcribed_text.split()) |
|
max_summary_length = max(50, int(word_count * 0.3)) |
|
min_summary_length = max(20, int(word_count * 0.15)) |
|
|
|
summary = summarizer( |
|
transcribed_text, |
|
max_length=max_summary_length, |
|
min_length=min_summary_length, |
|
do_sample=False |
|
) |
|
summarized_text = summary[0]["summary_text"] |
|
|
|
return transcribed_text, summarized_text |
|
|
|
except Exception as e: |
|
return f"Error: {str(e)}", "" |
|
|
|
|
|
|
|
def answer_question(question): |
|
global stored_transcript |
|
if not stored_transcript: |
|
return "Please transcribe a video or record audio first." |
|
result = qa_pipeline(question=question, context=stored_transcript) |
|
return result["answer"] |
|
|
|
|
|
with gr.Blocks(css=""" |
|
body { background-color: black !important; } |
|
.gradio-container { color: #FFFF33 !important; } |
|
button { background-color: #FFFF33 !important; color: black !important; border: none !important; } |
|
input, textarea, .gr-textbox, .gr-video, .gr-audio { background-color: #111 !important; color: #FFFF33 !important; border-color: #FFFF33 !important; } |
|
""") as iface: |
|
gr.HTML("<h1 style='color:#FFFF33'>π€ Video & Voice Transcriber, Summarizer & Q&A</h1>") |
|
gr.HTML("<p style='color:#CCCC33'>Upload a video or record speech to get transcript, summary, and ask questions.</p>") |
|
|
|
with gr.Tab("π₯ Video Upload"): |
|
video_input = gr.Video(label="Upload Video (.mp4)", interactive=True) |
|
transcribe_btn = gr.Button("π Transcribe from Video") |
|
transcribed_text_v = gr.Textbox(label="Transcribed Text", lines=8, interactive=False) |
|
summarized_text_v = gr.Textbox(label="Summarized Text", lines=8, interactive=False) |
|
|
|
transcribe_btn.click(fn=transcribe_from_video, inputs=video_input, outputs=[transcribed_text_v, summarized_text_v]) |
|
|
|
with gr.Tab("ποΈ Record Speech"): |
|
audio_input = gr.Audio(type="filepath", label="Record Audio") |
|
record_btn = gr.Button("π§ Transcribe from Audio") |
|
transcribed_text_a = gr.Textbox(label="Transcribed Text", lines=8, interactive=False) |
|
summarized_text_a = gr.Textbox(label="Summarized Text", lines=8, interactive=False) |
|
|
|
record_btn.click(fn=transcribe_from_audio, inputs=audio_input, outputs=[transcribed_text_a, summarized_text_a]) |
|
|
|
with gr.Tab("β Ask Questions"): |
|
question_input = gr.Textbox(label="Ask a question about the transcript", placeholder="E.g., What was the main topic?") |
|
ask_btn = gr.Button("π Get Answer") |
|
answer_output = gr.Textbox(label="Answer", interactive=False) |
|
|
|
ask_btn.click(fn=answer_question, inputs=question_input, outputs=answer_output) |
|
|
|
|
|
port = int(os.environ.get('PORT1', 7860)) |
|
url = iface.launch(share=True, server_port=port) |
|
print(f"Interface is live at: {url}") |
|
|