Spaces:
Sleeping
Sleeping
File size: 3,318 Bytes
2a0c297 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import gradio as gr
from transformers import pipeline
from newspaper import Article
import fitz # PyMuPDF
from summarizer import Summarizer
# --------- UTILITY FUNCTIONS ---------
def extract_text_from_pdf(pdf_file):
doc = fitz.open(stream=pdf_file, filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
return text
def extract_text_from_url(url):
article = Article(url)
article.download()
article.parse()
return article.text
abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
extractive_summarizer = Summarizer()
def generate_abstractive_summary(text, max_length=130, min_length=30):
summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
return summary[0]['summary_text']
def generate_extractive_summary(text, ratio=0.3):
return extractive_summarizer(text, ratio=ratio)
def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
input_text = ""
try:
if source_type == "Text" and text:
input_text = text
elif source_type == "PDF" and pdf is not None:
input_text = extract_text_from_pdf(pdf)
elif source_type == "URL" and url:
input_text = extract_text_from_url(url)
else:
return "❗Please provide a valid input.", ""
if len(input_text.strip()) == 0:
return "❗Input is empty after extraction.", ""
# Bart/T5 models handle ~1024 tokens (~2000 characters)
input_text = input_text[:2000]
abstractive = generate_abstractive_summary(input_text, max_length, min_length)
extractive = generate_extractive_summary(input_text, ratio)
return abstractive, extractive
except Exception as e:
return f"⚠️ Error: {str(e)}", ""
# --------- GRADIO UI ---------
with gr.Blocks() as demo:
gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both **abstractive** and **extractive** summaries.")
source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")
text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
url_input = gr.Textbox(label="Enter URL", visible=False)
max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")
btn = gr.Button("Generate Summaries")
output_ab = gr.Textbox(label="Abstractive Summary")
output_ex = gr.Textbox(label="Extractive Summary")
def toggle_inputs(src):
return {
text_input: gr.update(visible=(src == "Text")),
pdf_input: gr.update(visible=(src == "PDF")),
url_input: gr.update(visible=(src == "URL"))
}
source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])
btn.click(
summarize_text,
inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
outputs=[output_ab, output_ex]
)
if __name__ == "__main__":
demo.launch()
|