SparshSG commited on
Commit
1bc4912
·
verified ·
1 Parent(s): c2ad86b

Upload 3 files

Browse files
Files changed (3) hide show
  1. readme.md +13 -0
  2. requirements.txt +6 -0
  3. text_summarizer.py +95 -0
readme.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🧠 AI Text Summarizer
2
+
3
+ Summarize **Text**, **PDFs**, or **URLs** using two techniques:
4
+ - ✂️ Extractive Summarization (BERT-based)
5
+ - 🧠 Abstractive Summarization (BART Transformer)
6
+
7
+ ### Features:
8
+ ✅ Hugging Face Transformers
9
+ ✅ Gradio UI
10
+ ✅ Supports multiple input types
11
+ ✅ Adjustable summary length / ratio
12
+
13
+ Made with 💙 by Sparsh
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ newspaper3k
5
+ PyMuPDF
6
+ bert-extractive-summarizer
text_summarizer.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from newspaper import Article
4
+ import fitz # PyMuPDF
5
+ from summarizer import Summarizer
6
+
7
+ # --------- UTILITY FUNCTIONS ---------
8
+
9
+ def extract_text_from_pdf(pdf_file):
10
+ doc = fitz.open(stream=pdf_file, filetype="pdf")
11
+ text = ""
12
+ for page in doc:
13
+ text += page.get_text()
14
+ return text
15
+
16
+ def extract_text_from_url(url):
17
+ article = Article(url)
18
+ article.download()
19
+ article.parse()
20
+ return article.text
21
+
22
+ abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
+ extractive_summarizer = Summarizer()
24
+
25
+ def generate_abstractive_summary(text, max_length=130, min_length=30):
26
+ summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
27
+ return summary[0]['summary_text']
28
+
29
+ def generate_extractive_summary(text, ratio=0.3):
30
+ return extractive_summarizer(text, ratio=ratio)
31
+
32
+ def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
33
+ input_text = ""
34
+
35
+ try:
36
+ if source_type == "Text" and text:
37
+ input_text = text
38
+ elif source_type == "PDF" and pdf is not None:
39
+ input_text = extract_text_from_pdf(pdf)
40
+ elif source_type == "URL" and url:
41
+ input_text = extract_text_from_url(url)
42
+ else:
43
+ return "❗Please provide a valid input.", ""
44
+
45
+ if len(input_text.strip()) == 0:
46
+ return "❗Input is empty after extraction.", ""
47
+
48
+ # Bart/T5 models handle ~1024 tokens (~2000 characters)
49
+ input_text = input_text[:2000]
50
+
51
+ abstractive = generate_abstractive_summary(input_text, max_length, min_length)
52
+ extractive = generate_extractive_summary(input_text, ratio)
53
+
54
+ return abstractive, extractive
55
+
56
+ except Exception as e:
57
+ return f"⚠️ Error: {str(e)}", ""
58
+
59
+ # --------- GRADIO UI ---------
60
+
61
+ with gr.Blocks() as demo:
62
+ gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")
63
+
64
+ source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")
65
+
66
+ text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
67
+ pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
68
+ url_input = gr.Textbox(label="Enter URL", visible=False)
69
+
70
+ max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
71
+ min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
72
+ ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")
73
+
74
+ btn = gr.Button("Generate Summaries")
75
+
76
+ output_ab = gr.Textbox(label="Abstractive Summary")
77
+ output_ex = gr.Textbox(label="Extractive Summary")
78
+
79
+ def toggle_inputs(src):
80
+ return {
81
+ text_input: gr.update(visible=(src == "Text")),
82
+ pdf_input: gr.update(visible=(src == "PDF")),
83
+ url_input: gr.update(visible=(src == "URL"))
84
+ }
85
+
86
+ source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])
87
+
88
+ btn.click(
89
+ summarize_text,
90
+ inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
91
+ outputs=[output_ab, output_ex]
92
+ )
93
+
94
+ if __name__ == "__main__":
95
+ demo.launch()