Spaces:
Sleeping
Sleeping
File size: 4,409 Bytes
bfba113 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import streamlit as st
from transformers import pipeline
import PyPDF2
import docx
import textwrap
# Streamlit Page Config
st.set_page_config(
page_title="TextSphere",
page_icon="π€",
layout="wide",
initial_sidebar_state="expanded"
)
# Footer
st.markdown("""
<style>
.footer {
position: fixed;
bottom: 0;
right: 0;
padding: 10px;
font-size: 16px;
color: #333;
background-color: #f1f1f1;
}
</style>
<div class="footer">
Made with β€οΈ by Baibhav Malviya
</div>
""", unsafe_allow_html=True)
# Load Model
@st.cache_resource
def load_models():
try:
summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
except Exception as e:
raise RuntimeError(f"Failed to load model: {str(e)}")
return summarization_model
summarization_model = load_models()
# Function to Extract Text from PDF
def extract_text_from_pdf(uploaded_pdf):
try:
pdf_reader = PyPDF2.PdfReader(uploaded_pdf)
pdf_text = ""
for page in pdf_reader.pages:
text = page.extract_text()
if text:
pdf_text += text + "\n"
if not pdf_text.strip():
st.error("No text found in the PDF.")
return None
return pdf_text
except Exception as e:
st.error(f"Error reading the PDF: {e}")
return None
# Function to Extract Text from TXT
def extract_text_from_txt(uploaded_txt):
try:
return uploaded_txt.read().decode("utf-8").strip()
except Exception as e:
st.error(f"Error reading the TXT file: {e}")
return None
# Function to Extract Text from DOCX
def extract_text_from_docx(uploaded_docx):
try:
doc = docx.Document(uploaded_docx)
return "\n".join([para.text for para in doc.paragraphs]).strip()
except Exception as e:
st.error(f"Error reading the DOCX file: {e}")
return None
# Function to Split Text into 1024-Token Chunks
def chunk_text(text, max_tokens=1024):
return textwrap.wrap(text, width=max_tokens)
# Sidebar for Task Selection (Default: Text Summarization)
st.sidebar.title("AI Solutions")
option = st.sidebar.selectbox(
"Choose a task",
["Text Summarization", "Question Answering", "Text Classification", "Language Translation"],
index=0 # Default to "Text Summarization"
)
# Text Summarization Task
if option == "Text Summarization":
st.title("π Text Summarization")
st.markdown("<h4 style='font-size: 20px;'>- because who needs to read the whole document? π₯΅</h4>", unsafe_allow_html=True)
uploaded_file = st.file_uploader(
"Upload a document (PDF, TXT, DOCX) - *Note: Processes only 1024 tokens per chunk*",
type=["pdf", "txt", "docx"]
)
text_to_summarize = ""
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "pdf":
text_to_summarize = extract_text_from_pdf(uploaded_file)
elif file_type == "txt":
text_to_summarize = extract_text_from_txt(uploaded_file)
elif file_type == "docx":
text_to_summarize = extract_text_from_docx(uploaded_file)
else:
st.error("Unsupported file format.")
if st.button("Summarize"):
with st.spinner('Summarizing...'):
try:
if text_to_summarize:
chunks = chunk_text(text_to_summarize, max_tokens=1024)
summaries = []
for chunk in chunks:
input_length = len(chunk.split()) # Count words in the chunk
max_summary_length = max(50, input_length // 2) # Dynamically adjust max_length
summary = summarization_model(chunk, max_length=max_summary_length, min_length=50, do_sample=False)
summaries.append(summary[0]['summary_text'])
final_summary = " ".join(summaries) # Combine all chunk summaries
st.write("### Summary:")
st.write(final_summary)
else:
st.error("Please upload a document first.")
except Exception as e:
st.error(f"Error: {e}")
|