awacke1's picture
Create app.py
d9c302e
raw
history blame
2.31 kB
import streamlit as st
import re
import nltk
from nltk.corpus import stopwords
from nltk import FreqDist
from graphviz import Digraph
nltk.download('punkt')
nltk.download('stopwords')
def remove_timestamps(text):
return re.sub(r'\d{1,2}:\d{2}\n', '', text)
def process_text(text):
lines = text.split("\n")
processed_lines = []
for line in lines:
if line:
processed_lines.append(line)
outline = ""
for i, line in enumerate(processed_lines):
if i % 2 == 0:
outline += f"**{line}**\n"
else:
outline += f"- {line} 😄\n"
return outline
def unit_test(input_text):
st.write("Test Text without Timestamps:")
test_text_without_timestamps = remove_timestamps(input_text)
st.write(test_text_without_timestamps)
def extract_high_information_words(text, top_n=10):
words = nltk.word_tokenize(text)
words = [word.lower() for word in words if word.isalpha()]
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word not in stop_words]
freq_dist = FreqDist(filtered_words)
high_information_words = [word for word, _ in freq_dist.most_common(top_n)]
return high_information_words
def create_relationship_graph(words):
graph = Digraph()
for index, word in enumerate(words):
graph.node(str(index), word)
if index > 0:
graph.edge(str(index - 1), str(index), label=str(index))
return graph
def display_relationship_graph(words):
graph = create_relationship_graph(words)
st.graphviz_chart(graph)
uploaded_file = st.file_uploader("Choose a .txt file", type=['txt'])
if uploaded_file:
file_text = uploaded_file.read().decode("utf-8")
text_without_timestamps = remove_timestamps(file_text)
st.markdown("**Text without Timestamps:**")
st.write(text_without_timestamps)
processed_text = process_text(text_without_timestamps)
st.markdown("**Markdown Outline with Emojis:**")
st.markdown(processed_text)
unit_test(file_text)
top_words = extract_high_information_words(text_without_timestamps, 10)
st.markdown("**Top 10 High Information Words:**")
st.write(top_words)
st.markdown("**Relationship Graph:**")
display_relationship_graph(top_words)