Spaces:
Sleeping
Sleeping
from transformers import pipeline, AutoTokenizer | |
import gradio as gr | |
# Load tokenizer for truncation | |
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-6-6") | |
# Load summarization pipeline | |
summarizer = pipeline( | |
"summarization", | |
model="sshleifer/distilbart-cnn-6-6", | |
) | |
# Truncation helper 🫶 | |
def safe_truncate(text, max_tokens=1024): | |
tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens) | |
return tokenizer.decode(tokens, skip_special_tokens=True) | |
# Summary function with truncation applied | |
def summarize_article(text): | |
short_text = safe_truncate(text) # prevent model from breaking! | |
summary = summarizer( | |
short_text, | |
max_length=250, | |
min_length=100, | |
do_sample=False, | |
) | |
return summary[0]['summary_text'] | |
# Default example | |
default_article = """New York (CNN)When Liana Barrientos was 23 years old, she got married...""" # [shortened for demo] | |
default_summary = summarize_article(default_article) | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=summarize_article, | |
inputs=gr.Textbox(lines=20, label="Article (Read Only)", value=default_article, interactive=False), | |
outputs=gr.Textbox(label="Summary (Read Only)", value=default_summary, interactive=False), | |
title="⚡ Fast Article Summarizer (CPU Optimized)", | |
description="Fast summarization with longer output using CPU only. Inputs and outputs are read-only." | |
) | |
iface.launch() | |