Spaces:
Running
Running
Commit
·
5037fb3
1
Parent(s):
80d50c2
progress more 40+
Browse files
app.py
CHANGED
@@ -34,12 +34,18 @@ def create_analysis_data(df):
|
|
34 |
|
35 |
# Function for lemmatizing Russian text
|
36 |
def lemmatize_text(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
words = text.split()
|
38 |
lemmatized_words = []
|
39 |
for word in tqdm(words, desc="Lemmatizing", unit="word"):
|
40 |
lemmatized_word = ''.join(mystem.lemmatize(word))
|
41 |
lemmatized_words.append(lemmatized_word)
|
42 |
-
return ' '.join(lemmatized_words)
|
43 |
|
44 |
# Translation model for Russian to English
|
45 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
@@ -243,7 +249,7 @@ def create_output_file(df, uploaded_file, analysis_df):
|
|
243 |
return output
|
244 |
|
245 |
def main():
|
246 |
-
st.title("... приступим к анализу... версия
|
247 |
|
248 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
249 |
|
|
|
34 |
|
35 |
# Function for lemmatizing Russian text
|
36 |
def lemmatize_text(text):
|
37 |
+
if pd.isna(text):
|
38 |
+
return ""
|
39 |
+
|
40 |
+
if not isinstance(text, str):
|
41 |
+
text = str(text)
|
42 |
+
|
43 |
words = text.split()
|
44 |
lemmatized_words = []
|
45 |
for word in tqdm(words, desc="Lemmatizing", unit="word"):
|
46 |
lemmatized_word = ''.join(mystem.lemmatize(word))
|
47 |
lemmatized_words.append(lemmatized_word)
|
48 |
+
return ' '.join(lemmatized_words)
|
49 |
|
50 |
# Translation model for Russian to English
|
51 |
model_name = "Helsinki-NLP/opus-mt-ru-en"
|
|
|
249 |
return output
|
250 |
|
251 |
def main():
|
252 |
+
st.title("... приступим к анализу... версия 40+")
|
253 |
|
254 |
uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
|
255 |
|