import gradio as gr from transformers import pipeline from newspaper import Article import fitz # PyMuPDF from summarizer import Summarizer # --------- UTILITY FUNCTIONS --------- def extract_text_from_pdf(pdf_file): doc = fitz.open(stream=pdf_file, filetype="pdf") text = "" for page in doc: text += page.get_text() return text def extract_text_from_url(url): article = Article(url) article.download() article.parse() return article.text abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn") extractive_summarizer = Summarizer() def generate_abstractive_summary(text, max_length=130, min_length=30): summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) return summary[0]['summary_text'] def generate_extractive_summary(text, ratio=0.3): return extractive_summarizer(text, ratio=ratio) def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio): input_text = "" try: if source_type == "Text" and text: input_text = text elif source_type == "PDF" and pdf is not None: input_text = extract_text_from_pdf(pdf) elif source_type == "URL" and url: input_text = extract_text_from_url(url) else: return "❗Please provide a valid input.", "" if len(input_text.strip()) == 0: return "❗Input is empty after extraction.", "" # Bart/T5 models handle ~1024 tokens (~2000 characters) input_text = input_text[:2000] abstractive = generate_abstractive_summary(input_text, max_length, min_length) extractive = generate_extractive_summary(input_text, ratio) return abstractive, extractive except Exception as e: return f"⚠️ Error: {str(e)}", "" # --------- GRADIO UI --------- with gr.Blocks() as demo: gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.") source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source") text_input = gr.Textbox(lines=8, label="Enter Text", visible=False) pdf_input = gr.File(label="Upload PDF", type="binary", visible=False) url_input = gr.Textbox(label="Enter URL", visible=False) max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)") min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)") ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)") btn = gr.Button("Generate Summaries") output_ab = gr.Textbox(label="Abstractive Summary") output_ex = gr.Textbox(label="Extractive Summary") def toggle_inputs(src): return { text_input: gr.update(visible=(src == "Text")), pdf_input: gr.update(visible=(src == "PDF")), url_input: gr.update(visible=(src == "URL")) } source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input]) btn.click( summarize_text, inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio], outputs=[output_ab, output_ex] ) if __name__ == "__main__": demo.launch()