SparshSG commited on
Commit
bae5e51
·
verified ·
1 Parent(s): 1bc4912

Delete text_summarizer.py

Browse files
Files changed (1) hide show
  1. text_summarizer.py +0 -95
text_summarizer.py DELETED
@@ -1,95 +0,0 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
- from newspaper import Article
4
- import fitz # PyMuPDF
5
- from summarizer import Summarizer
6
-
7
- # --------- UTILITY FUNCTIONS ---------
8
-
9
- def extract_text_from_pdf(pdf_file):
10
- doc = fitz.open(stream=pdf_file, filetype="pdf")
11
- text = ""
12
- for page in doc:
13
- text += page.get_text()
14
- return text
15
-
16
- def extract_text_from_url(url):
17
- article = Article(url)
18
- article.download()
19
- article.parse()
20
- return article.text
21
-
22
- abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
- extractive_summarizer = Summarizer()
24
-
25
- def generate_abstractive_summary(text, max_length=130, min_length=30):
26
- summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
27
- return summary[0]['summary_text']
28
-
29
- def generate_extractive_summary(text, ratio=0.3):
30
- return extractive_summarizer(text, ratio=ratio)
31
-
32
- def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
33
- input_text = ""
34
-
35
- try:
36
- if source_type == "Text" and text:
37
- input_text = text
38
- elif source_type == "PDF" and pdf is not None:
39
- input_text = extract_text_from_pdf(pdf)
40
- elif source_type == "URL" and url:
41
- input_text = extract_text_from_url(url)
42
- else:
43
- return "❗Please provide a valid input.", ""
44
-
45
- if len(input_text.strip()) == 0:
46
- return "❗Input is empty after extraction.", ""
47
-
48
- # Bart/T5 models handle ~1024 tokens (~2000 characters)
49
- input_text = input_text[:2000]
50
-
51
- abstractive = generate_abstractive_summary(input_text, max_length, min_length)
52
- extractive = generate_extractive_summary(input_text, ratio)
53
-
54
- return abstractive, extractive
55
-
56
- except Exception as e:
57
- return f"⚠️ Error: {str(e)}", ""
58
-
59
- # --------- GRADIO UI ---------
60
-
61
- with gr.Blocks() as demo:
62
- gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")
63
-
64
- source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")
65
-
66
- text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
67
- pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
68
- url_input = gr.Textbox(label="Enter URL", visible=False)
69
-
70
- max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
71
- min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
72
- ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")
73
-
74
- btn = gr.Button("Generate Summaries")
75
-
76
- output_ab = gr.Textbox(label="Abstractive Summary")
77
- output_ex = gr.Textbox(label="Extractive Summary")
78
-
79
- def toggle_inputs(src):
80
- return {
81
- text_input: gr.update(visible=(src == "Text")),
82
- pdf_input: gr.update(visible=(src == "PDF")),
83
- url_input: gr.update(visible=(src == "URL"))
84
- }
85
-
86
- source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])
87
-
88
- btn.click(
89
- summarize_text,
90
- inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
91
- outputs=[output_ab, output_ex]
92
- )
93
-
94
- if __name__ == "__main__":
95
- demo.launch()