pentarosarium commited on
Commit
55c5135
·
1 Parent(s): 6177fd7

progress more 42+

Browse files
Files changed (2) hide show
  1. app.py +10 -2
  2. sentiment_decorators.py +21 -0
app.py CHANGED
@@ -13,6 +13,7 @@ import torch
13
  from openpyxl import load_workbook
14
  from openpyxl import Workbook
15
  from openpyxl.utils.dataframe import dataframe_to_rows
 
16
 
17
  # Initialize pymystem3 for lemmatization
18
  mystem = Mystem()
@@ -85,22 +86,27 @@ def get_mapped_sentiment(result):
85
  return "Negative"
86
  return "Neutral"
87
 
 
88
  def get_rubert1_sentiment(text):
89
  result = rubert1(text, truncation=True, max_length=512)[0]
90
  return get_mapped_sentiment(result)
91
 
 
92
  def get_rubert2_sentiment(text):
93
  result = rubert2(text, truncation=True, max_length=512)[0]
94
  return get_mapped_sentiment(result)
95
 
 
96
  def get_finbert_sentiment(text):
97
  result = finbert(text, truncation=True, max_length=512)[0]
98
  return get_mapped_sentiment(result)
99
 
 
100
  def get_roberta_sentiment(text):
101
  result = roberta(text, truncation=True, max_length=512)[0]
102
  return get_mapped_sentiment(result)
103
 
 
104
  def get_finbert_tone_sentiment(text):
105
  result = finbert_tone(text, truncation=True, max_length=512)[0]
106
  return get_mapped_sentiment(result)
@@ -149,7 +155,9 @@ def process_file(uploaded_file):
149
  total_news = len(df)
150
 
151
  texts = df['Выдержки из текста'].tolist()
152
-
 
 
153
  for text in df['Выдержки из текста']:
154
  lemmatized_texts.append(lemmatize_text(text))
155
 
@@ -249,7 +257,7 @@ def create_output_file(df, uploaded_file, analysis_df):
249
  return output
250
 
251
  def main():
252
- st.title("... приступим к анализу... версия 41+")
253
 
254
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
255
 
 
13
  from openpyxl import load_workbook
14
  from openpyxl import Workbook
15
  from openpyxl.utils.dataframe import dataframe_to_rows
16
+ from sentiment_decorators import sentiment_analysis_decorator
17
 
18
  # Initialize pymystem3 for lemmatization
19
  mystem = Mystem()
 
86
  return "Negative"
87
  return "Neutral"
88
 
89
+ @sentiment_analysis_decorator
90
  def get_rubert1_sentiment(text):
91
  result = rubert1(text, truncation=True, max_length=512)[0]
92
  return get_mapped_sentiment(result)
93
 
94
+ @sentiment_analysis_decorator
95
  def get_rubert2_sentiment(text):
96
  result = rubert2(text, truncation=True, max_length=512)[0]
97
  return get_mapped_sentiment(result)
98
 
99
+ @sentiment_analysis_decorator
100
  def get_finbert_sentiment(text):
101
  result = finbert(text, truncation=True, max_length=512)[0]
102
  return get_mapped_sentiment(result)
103
 
104
+ @sentiment_analysis_decorator
105
  def get_roberta_sentiment(text):
106
  result = roberta(text, truncation=True, max_length=512)[0]
107
  return get_mapped_sentiment(result)
108
 
109
+ @sentiment_analysis_decorator
110
  def get_finbert_tone_sentiment(text):
111
  result = finbert_tone(text, truncation=True, max_length=512)[0]
112
  return get_mapped_sentiment(result)
 
155
  total_news = len(df)
156
 
157
  texts = df['Выдержки из текста'].tolist()
158
+ # Data validation
159
+ texts = [str(text) if not pd.isna(text) else "" for text in texts]
160
+
161
  for text in df['Выдержки из текста']:
162
  lemmatized_texts.append(lemmatize_text(text))
163
 
 
257
  return output
258
 
259
  def main():
260
+ st.title("... приступим к анализу... версия 42+")
261
 
262
  uploaded_file = st.file_uploader("Выбирайте Excel-файл", type="xlsx")
263
 
sentiment_decorators.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+ from typing import Callable, Any
3
+
4
+
5
+ def sentiment_analysis_decorator(func: Callable[..., Any]) -> Callable[..., Any]:
6
+ @functools.wraps(func)
7
+ def wrapper(text: Any, *args: Any, **kwargs: Any) -> str:
8
+ if not isinstance(text, str):
9
+ if pd.isna(text):
10
+ return "Neutral" # nothing meanz neutral
11
+ text = str(text) # Convert to string
12
+
13
+ try:
14
+ result = func(text, *args, **kwargs)
15
+ return result
16
+ except Exception as e:
17
+ print(f"Error in {func.__name__} processing text: {text[:100]}...") # expose 100 chars of problematic text
18
+ print(f"Error: {str(e)}")
19
+ return "Neutral" # nothing meanz neutral
20
+
21
+ return wrapper