Spaces:

SparshSG
/

AI-text-summarizer

Sleeping

App Files Files Community

SparshSG commited on 25 days ago

Commit

1bc4912

verified ·

1 Parent(s): c2ad86b

Upload 3 files

Browse files

Files changed (3) hide show

readme.md +13 -0
requirements.txt +6 -0
text_summarizer.py +95 -0

readme.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# 🧠 AI Text Summarizer
+Summarize **Text**, **PDFs**, or **URLs** using two techniques:
+- ✂️ Extractive Summarization (BERT-based)
+- 🧠 Abstractive Summarization (BART Transformer)
+### Features:
+✅ Hugging Face Transformers
+✅ Gradio UI
+✅ Supports multiple input types
+✅ Adjustable summary length / ratio
+Made with 💙 by Sparsh

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+transformers
+torch
+newspaper3k
+PyMuPDF
+bert-extractive-summarizer

text_summarizer.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import gradio as gr
+from transformers import pipeline
+from newspaper import Article
+import fitz  # PyMuPDF
+from summarizer import Summarizer
+# --------- UTILITY FUNCTIONS ---------
+def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file, filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def extract_text_from_url(url):
+    article = Article(url)
+    article.download()
+    article.parse()
+    return article.text
+abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+extractive_summarizer = Summarizer()
+def generate_abstractive_summary(text, max_length=130, min_length=30):
+    summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
+    return summary[0]['summary_text']
+def generate_extractive_summary(text, ratio=0.3):
+    return extractive_summarizer(text, ratio=ratio)
+def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
+    input_text = ""
+    try:
+        if source_type == "Text" and text:
+            input_text = text
+        elif source_type == "PDF" and pdf is not None:
+            input_text = extract_text_from_pdf(pdf)
+        elif source_type == "URL" and url:
+            input_text = extract_text_from_url(url)
+        else:
+            return "❗Please provide a valid input.", ""
+        if len(input_text.strip()) == 0:
+            return "❗Input is empty after extraction.", ""
+        # Bart/T5 models handle ~1024 tokens (~2000 characters)
+        input_text = input_text[:2000]
+        abstractive = generate_abstractive_summary(input_text, max_length, min_length)
+        extractive = generate_extractive_summary(input_text, ratio)
+        return abstractive, extractive
+    except Exception as e:
+        return f"⚠️ Error: {str(e)}", ""
+# --------- GRADIO UI ---------
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")
+    source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")
+    text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
+    pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
+    url_input = gr.Textbox(label="Enter URL", visible=False)
+    max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
+    min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
+    ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")
+    btn = gr.Button("Generate Summaries")
+    output_ab = gr.Textbox(label="Abstractive Summary")
+    output_ex = gr.Textbox(label="Extractive Summary")
+    def toggle_inputs(src):
+        return {
+            text_input: gr.update(visible=(src == "Text")),
+            pdf_input: gr.update(visible=(src == "PDF")),
+            url_input: gr.update(visible=(src == "URL"))
+        }
+    source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])
+    btn.click(
+        summarize_text,
+        inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
+        outputs=[output_ab, output_ex]
+    )
+if __name__ == "__main__":
+    demo.launch()