Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import os
|
|
6 |
from nltk.corpus import stopwords
|
7 |
from nltk import FreqDist
|
8 |
from graphviz import Digraph
|
|
|
|
|
9 |
|
10 |
# Set page configuration with a title and favicon
|
11 |
st.set_page_config(
|
@@ -103,15 +105,25 @@ def load_example_files():
|
|
103 |
st.write("No suitable example files found.")
|
104 |
|
105 |
return None
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
# Main code for UI
|
117 |
uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
|
@@ -144,4 +156,18 @@ if file_text:
|
|
144 |
display_context_table(context_words)
|
145 |
|
146 |
with st.expander("Innovation Outlines"):
|
147 |
-
showInnovationOutlines()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from nltk.corpus import stopwords
|
7 |
from nltk import FreqDist
|
8 |
from graphviz import Digraph
|
9 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
10 |
+
from sklearn.cluster import KMeans
|
11 |
|
12 |
# Set page configuration with a title and favicon
|
13 |
st.set_page_config(
|
|
|
105 |
st.write("No suitable example files found.")
|
106 |
|
107 |
return None
|
108 |
+
|
109 |
+
def cluster_sentences(sentences, num_clusters):
|
110 |
+
# Vectorize the sentences
|
111 |
+
vectorizer = TfidfVectorizer()
|
112 |
+
X = vectorizer.fit_transform(sentences)
|
113 |
+
|
114 |
+
# Perform k-means clustering
|
115 |
+
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
|
116 |
+
kmeans.fit(X)
|
117 |
+
|
118 |
+
# Get the cluster labels for each sentence
|
119 |
+
labels = kmeans.labels_
|
120 |
+
|
121 |
+
# Group sentences by cluster
|
122 |
+
clustered_sentences = [[] for _ in range(num_clusters)]
|
123 |
+
for i, label in enumerate(labels):
|
124 |
+
clustered_sentences[label].append(sentences[i])
|
125 |
+
|
126 |
+
return clustered_sentences
|
127 |
|
128 |
# Main code for UI
|
129 |
uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
|
|
|
156 |
display_context_table(context_words)
|
157 |
|
158 |
with st.expander("Innovation Outlines"):
|
159 |
+
showInnovationOutlines()
|
160 |
+
|
161 |
+
with st.expander("📝 Sentence Clustering"):
|
162 |
+
sentences = [sentence.strip() for sentence in text_without_timestamps.split('.') if sentence.strip()]
|
163 |
+
|
164 |
+
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
165 |
+
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
166 |
+
|
167 |
+
output_text = ""
|
168 |
+
for i, cluster in enumerate(clustered_sentences):
|
169 |
+
output_text += f"Cluster {i+1}:\n"
|
170 |
+
output_text += "\n".join(cluster)
|
171 |
+
output_text += "\n\n"
|
172 |
+
|
173 |
+
st.text_area("Clustered Sentences", value=output_text, height=400)
|