Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -219,44 +219,44 @@ if file_text:
|
|
219 |
with st.expander("📑 Context Table"):
|
220 |
display_context_table(context_words)
|
221 |
|
222 |
-
with st.expander("📝 Sentence Clustering", expanded=True):
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
-
|
226 |
-
st.write(f"Total Sentences: {num_sentences}")
|
227 |
-
|
228 |
-
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
229 |
-
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
230 |
-
|
231 |
-
col1, col2 = st.columns(2)
|
232 |
-
|
233 |
-
with col1:
|
234 |
-
st.subheader("Original Text")
|
235 |
-
original_text = "\n".join(sentences)
|
236 |
-
st.text_area("Original Sentences", value=original_text, height=400)
|
237 |
-
|
238 |
-
with col2:
|
239 |
-
st.subheader("Clustered Text")
|
240 |
-
clusters = ""
|
241 |
-
clustered_text = ""
|
242 |
-
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
243 |
-
|
244 |
-
for i, cluster in enumerate(clustered_sentences):
|
245 |
-
cluster_text = "\n".join(cluster)
|
246 |
-
high_info_words = ", ".join(cluster_high_info_words[i])
|
247 |
-
clusters += f"Cluster {i+1} (High Info Words: {high_info_words})\n"
|
248 |
-
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
249 |
-
|
250 |
-
st.text_area("Clusters", value=clusters, height=200)
|
251 |
-
st.text_area("Clustered Sentences", value=clustered_text, height=200)
|
252 |
-
|
253 |
-
# Verify that all sentences are accounted for in the clustered output
|
254 |
-
clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
|
255 |
-
if set(sentences) == set(clustered_sentences_flat):
|
256 |
-
st.write("✅ All sentences are accounted for in the clustered output.")
|
257 |
-
else:
|
258 |
-
st.write("❌ Some sentences are missing in the clustered output.")
|
259 |
-
|
260 |
-
plot_cluster_words(clustered_sentences)
|
261 |
|
262 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|
|
|
219 |
with st.expander("📑 Context Table"):
|
220 |
display_context_table(context_words)
|
221 |
|
222 |
+
#with st.expander("📝 Sentence Clustering", expanded=True):
|
223 |
+
sentences = [line.strip() for line in file_text.split('\n') if len(line.strip()) > 10]
|
224 |
+
|
225 |
+
num_sentences = len(sentences)
|
226 |
+
st.write(f"Total Sentences: {num_sentences}")
|
227 |
+
|
228 |
+
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
229 |
+
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
230 |
+
|
231 |
+
col1, col2 = st.columns(2)
|
232 |
+
|
233 |
+
with col1:
|
234 |
+
st.subheader("Original Text")
|
235 |
+
original_text = "\n".join(sentences)
|
236 |
+
st.text_area("Original Sentences", value=original_text, height=400)
|
237 |
+
|
238 |
+
with col2:
|
239 |
+
st.subheader("Clustered Text")
|
240 |
+
clusters = ""
|
241 |
+
clustered_text = ""
|
242 |
+
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
243 |
+
|
244 |
+
for i, cluster in enumerate(clustered_sentences):
|
245 |
+
cluster_text = "\n".join(cluster)
|
246 |
+
high_info_words = ", ".join(cluster_high_info_words[i])
|
247 |
+
clusters += f"Cluster {i+1} (High Info Words: {high_info_words})\n"
|
248 |
+
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
249 |
+
|
250 |
+
st.text_area("Clusters", value=clusters, height=200)
|
251 |
+
st.text_area("Clustered Sentences", value=clustered_text, height=200)
|
252 |
+
|
253 |
+
# Verify that all sentences are accounted for in the clustered output
|
254 |
+
clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
|
255 |
+
if set(sentences) == set(clustered_sentences_flat):
|
256 |
+
st.write("✅ All sentences are accounted for in the clustered output.")
|
257 |
+
else:
|
258 |
+
st.write("❌ Some sentences are missing in the clustered output.")
|
259 |
|
260 |
+
plot_cluster_words(clustered_sentences)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|