File size: 3,318 Bytes
2a0c297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
from transformers import pipeline
from newspaper import Article
import fitz  # PyMuPDF
from summarizer import Summarizer

# --------- UTILITY FUNCTIONS ---------

def extract_text_from_pdf(pdf_file):
    doc = fitz.open(stream=pdf_file, filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def extract_text_from_url(url):
    article = Article(url)
    article.download()
    article.parse()
    return article.text

abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
extractive_summarizer = Summarizer()

def generate_abstractive_summary(text, max_length=130, min_length=30):
    summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
    return summary[0]['summary_text']

def generate_extractive_summary(text, ratio=0.3):
    return extractive_summarizer(text, ratio=ratio)

def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
    input_text = ""

    try:
        if source_type == "Text" and text:
            input_text = text
        elif source_type == "PDF" and pdf is not None:
            input_text = extract_text_from_pdf(pdf)
        elif source_type == "URL" and url:
            input_text = extract_text_from_url(url)
        else:
            return "❗Please provide a valid input.", ""

        if len(input_text.strip()) == 0:
            return "❗Input is empty after extraction.", ""

        # Bart/T5 models handle ~1024 tokens (~2000 characters)
        input_text = input_text[:2000]

        abstractive = generate_abstractive_summary(input_text, max_length, min_length)
        extractive = generate_extractive_summary(input_text, ratio)

        return abstractive, extractive

    except Exception as e:
        return f"⚠️ Error: {str(e)}", ""

# --------- GRADIO UI ---------

with gr.Blocks() as demo:
    gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")

    source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")

    text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
    pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
    url_input = gr.Textbox(label="Enter URL", visible=False)

    max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
    min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
    ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")

    btn = gr.Button("Generate Summaries")

    output_ab = gr.Textbox(label="Abstractive Summary")
    output_ex = gr.Textbox(label="Extractive Summary")

    def toggle_inputs(src):
        return {
            text_input: gr.update(visible=(src == "Text")),
            pdf_input: gr.update(visible=(src == "PDF")),
            url_input: gr.update(visible=(src == "URL"))
        }

    source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])

    btn.click(
        summarize_text,
        inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
        outputs=[output_ab, output_ex]
    )

if __name__ == "__main__":
    demo.launch()