Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- readme.md +13 -0
- requirements.txt +6 -0
- text_summarizer.py +95 -0
readme.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 🧠 AI Text Summarizer
|
2 |
+
|
3 |
+
Summarize **Text**, **PDFs**, or **URLs** using two techniques:
|
4 |
+
- ✂️ Extractive Summarization (BERT-based)
|
5 |
+
- 🧠 Abstractive Summarization (BART Transformer)
|
6 |
+
|
7 |
+
### Features:
|
8 |
+
✅ Hugging Face Transformers
|
9 |
+
✅ Gradio UI
|
10 |
+
✅ Supports multiple input types
|
11 |
+
✅ Adjustable summary length / ratio
|
12 |
+
|
13 |
+
Made with 💙 by Sparsh
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
torch
|
4 |
+
newspaper3k
|
5 |
+
PyMuPDF
|
6 |
+
bert-extractive-summarizer
|
text_summarizer.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from newspaper import Article
|
4 |
+
import fitz # PyMuPDF
|
5 |
+
from summarizer import Summarizer
|
6 |
+
|
7 |
+
# --------- UTILITY FUNCTIONS ---------
|
8 |
+
|
9 |
+
def extract_text_from_pdf(pdf_file):
|
10 |
+
doc = fitz.open(stream=pdf_file, filetype="pdf")
|
11 |
+
text = ""
|
12 |
+
for page in doc:
|
13 |
+
text += page.get_text()
|
14 |
+
return text
|
15 |
+
|
16 |
+
def extract_text_from_url(url):
|
17 |
+
article = Article(url)
|
18 |
+
article.download()
|
19 |
+
article.parse()
|
20 |
+
return article.text
|
21 |
+
|
22 |
+
abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
23 |
+
extractive_summarizer = Summarizer()
|
24 |
+
|
25 |
+
def generate_abstractive_summary(text, max_length=130, min_length=30):
|
26 |
+
summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
|
27 |
+
return summary[0]['summary_text']
|
28 |
+
|
29 |
+
def generate_extractive_summary(text, ratio=0.3):
|
30 |
+
return extractive_summarizer(text, ratio=ratio)
|
31 |
+
|
32 |
+
def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
|
33 |
+
input_text = ""
|
34 |
+
|
35 |
+
try:
|
36 |
+
if source_type == "Text" and text:
|
37 |
+
input_text = text
|
38 |
+
elif source_type == "PDF" and pdf is not None:
|
39 |
+
input_text = extract_text_from_pdf(pdf)
|
40 |
+
elif source_type == "URL" and url:
|
41 |
+
input_text = extract_text_from_url(url)
|
42 |
+
else:
|
43 |
+
return "❗Please provide a valid input.", ""
|
44 |
+
|
45 |
+
if len(input_text.strip()) == 0:
|
46 |
+
return "❗Input is empty after extraction.", ""
|
47 |
+
|
48 |
+
# Bart/T5 models handle ~1024 tokens (~2000 characters)
|
49 |
+
input_text = input_text[:2000]
|
50 |
+
|
51 |
+
abstractive = generate_abstractive_summary(input_text, max_length, min_length)
|
52 |
+
extractive = generate_extractive_summary(input_text, ratio)
|
53 |
+
|
54 |
+
return abstractive, extractive
|
55 |
+
|
56 |
+
except Exception as e:
|
57 |
+
return f"⚠️ Error: {str(e)}", ""
|
58 |
+
|
59 |
+
# --------- GRADIO UI ---------
|
60 |
+
|
61 |
+
with gr.Blocks() as demo:
|
62 |
+
gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")
|
63 |
+
|
64 |
+
source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")
|
65 |
+
|
66 |
+
text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
|
67 |
+
pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
|
68 |
+
url_input = gr.Textbox(label="Enter URL", visible=False)
|
69 |
+
|
70 |
+
max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
|
71 |
+
min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
|
72 |
+
ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")
|
73 |
+
|
74 |
+
btn = gr.Button("Generate Summaries")
|
75 |
+
|
76 |
+
output_ab = gr.Textbox(label="Abstractive Summary")
|
77 |
+
output_ex = gr.Textbox(label="Extractive Summary")
|
78 |
+
|
79 |
+
def toggle_inputs(src):
|
80 |
+
return {
|
81 |
+
text_input: gr.update(visible=(src == "Text")),
|
82 |
+
pdf_input: gr.update(visible=(src == "PDF")),
|
83 |
+
url_input: gr.update(visible=(src == "URL"))
|
84 |
+
}
|
85 |
+
|
86 |
+
source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])
|
87 |
+
|
88 |
+
btn.click(
|
89 |
+
summarize_text,
|
90 |
+
inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
|
91 |
+
outputs=[output_ab, output_ex]
|
92 |
+
)
|
93 |
+
|
94 |
+
if __name__ == "__main__":
|
95 |
+
demo.launch()
|