Spaces:

awacke1
/

Transcript-EDA-NLTK

Sleeping

App Files Files Community

awacke1 commited on Mar 14, 2024

Commit

0e699cd

verified ·

1 Parent(s): e5c46ba

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -10

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import os
 from nltk.corpus import stopwords
 from nltk import FreqDist
 from graphviz import Digraph
 # Set page configuration with a title and favicon
 st.set_page_config(
@@ -103,15 +105,25 @@ def load_example_files():
         st.write("No suitable example files found.")
     return None
-# Load example files
-def load_example_files_old():
-    example_files = [f for f in os.listdir() if f.endswith('.txt')]
-    selected_file = st.selectbox("📄 Select an example file:", example_files)
-    if st.button(f"📂 Load {selected_file}"):
-        with open(selected_file, 'r', encoding="utf-8") as file:
-            return file.read()
-    return None
 # Main code for UI
 uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
@@ -144,4 +156,18 @@ if file_text:
         display_context_table(context_words)
     with st.expander("Innovation Outlines"):
-        showInnovationOutlines()

 from nltk.corpus import stopwords
 from nltk import FreqDist
 from graphviz import Digraph
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.cluster import KMeans
 # Set page configuration with a title and favicon
 st.set_page_config(
         st.write("No suitable example files found.")
     return None
+def cluster_sentences(sentences, num_clusters):
+    # Vectorize the sentences
+    vectorizer = TfidfVectorizer()
+    X = vectorizer.fit_transform(sentences)
+    # Perform k-means clustering
+    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
+    kmeans.fit(X)
+    # Get the cluster labels for each sentence
+    labels = kmeans.labels_
+    # Group sentences by cluster
+    clustered_sentences = [[] for _ in range(num_clusters)]
+    for i, label in enumerate(labels):
+        clustered_sentences[label].append(sentences[i])
+    return clustered_sentences
 # Main code for UI
 uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
         display_context_table(context_words)
     with st.expander("Innovation Outlines"):
+        showInnovationOutlines()
+    with st.expander("📝 Sentence Clustering"):
+        sentences = [sentence.strip() for sentence in text_without_timestamps.split('.') if sentence.strip()]
+        num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
+        clustered_sentences = cluster_sentences(sentences, num_clusters)
+        output_text = ""
+        for i, cluster in enumerate(clustered_sentences):
+            output_text += f"Cluster {i+1}:\n"
+            output_text += "\n".join(cluster)
+            output_text += "\n\n"
+        st.text_area("Clustered Sentences", value=output_text, height=400)