awacke1 commited on
Commit
0e699cd
·
verified ·
1 Parent(s): e5c46ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -10
app.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  from nltk.corpus import stopwords
7
  from nltk import FreqDist
8
  from graphviz import Digraph
 
 
9
 
10
  # Set page configuration with a title and favicon
11
  st.set_page_config(
@@ -103,15 +105,25 @@ def load_example_files():
103
  st.write("No suitable example files found.")
104
 
105
  return None
106
-
107
- # Load example files
108
- def load_example_files_old():
109
- example_files = [f for f in os.listdir() if f.endswith('.txt')]
110
- selected_file = st.selectbox("📄 Select an example file:", example_files)
111
- if st.button(f"📂 Load {selected_file}"):
112
- with open(selected_file, 'r', encoding="utf-8") as file:
113
- return file.read()
114
- return None
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Main code for UI
117
  uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
@@ -144,4 +156,18 @@ if file_text:
144
  display_context_table(context_words)
145
 
146
  with st.expander("Innovation Outlines"):
147
- showInnovationOutlines()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from nltk.corpus import stopwords
7
  from nltk import FreqDist
8
  from graphviz import Digraph
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
+ from sklearn.cluster import KMeans
11
 
12
  # Set page configuration with a title and favicon
13
  st.set_page_config(
 
105
  st.write("No suitable example files found.")
106
 
107
  return None
108
+
109
+ def cluster_sentences(sentences, num_clusters):
110
+ # Vectorize the sentences
111
+ vectorizer = TfidfVectorizer()
112
+ X = vectorizer.fit_transform(sentences)
113
+
114
+ # Perform k-means clustering
115
+ kmeans = KMeans(n_clusters=num_clusters, random_state=42)
116
+ kmeans.fit(X)
117
+
118
+ # Get the cluster labels for each sentence
119
+ labels = kmeans.labels_
120
+
121
+ # Group sentences by cluster
122
+ clustered_sentences = [[] for _ in range(num_clusters)]
123
+ for i, label in enumerate(labels):
124
+ clustered_sentences[label].append(sentences[i])
125
+
126
+ return clustered_sentences
127
 
128
  # Main code for UI
129
  uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
 
156
  display_context_table(context_words)
157
 
158
  with st.expander("Innovation Outlines"):
159
+ showInnovationOutlines()
160
+
161
+ with st.expander("📝 Sentence Clustering"):
162
+ sentences = [sentence.strip() for sentence in text_without_timestamps.split('.') if sentence.strip()]
163
+
164
+ num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
165
+ clustered_sentences = cluster_sentences(sentences, num_clusters)
166
+
167
+ output_text = ""
168
+ for i, cluster in enumerate(clustered_sentences):
169
+ output_text += f"Cluster {i+1}:\n"
170
+ output_text += "\n".join(cluster)
171
+ output_text += "\n\n"
172
+
173
+ st.text_area("Clustered Sentences", value=output_text, height=400)