pentarosarium commited on
Commit
92287cb
·
1 Parent(s): 458b69b
Files changed (1) hide show
  1. app.py +109 -72
app.py CHANGED
@@ -3,28 +3,31 @@ import spaces
3
  import pandas as pd
4
  import torch
5
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
6
- from transformers import AutoModelForCausalLM
7
- import time
8
  import plotly.graph_objects as go
9
- from datetime import datetime
10
- from deep_translator import GoogleTranslator
11
- from googletrans import Translator as LegacyTranslator
12
- import io
13
- from openpyxl import load_workbook
14
- from openpyxl.utils.dataframe import dataframe_to_rows
15
 
16
  class EventDetector:
17
  def __init__(self):
18
- self.model_name = "google/mt5-small"
19
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
20
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name)
21
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
- self.model = self.model.to(self.device)
23
-
24
- # Initialize sentiment analyzers
25
- self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
26
- self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
27
- self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
 
 
 
 
 
 
 
28
 
29
  @spaces.GPU(duration=120)
30
  def detect_events(self, text, entity):
@@ -42,7 +45,6 @@ class EventDetector:
42
  outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
43
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
44
 
45
- # Event type classification logic
46
  event_type = "Нет"
47
  if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
48
  event_type = "Отчетность"
@@ -54,21 +56,28 @@ class EventDetector:
54
  return event_type, response
55
 
56
  except Exception as e:
 
57
  return "Нет", f"Error: {str(e)}"
58
 
59
  @spaces.GPU(duration=60)
60
  def analyze_sentiment(self, text):
61
  try:
62
  results = []
63
- results.append(self._get_sentiment(self.finbert(text)[0]))
64
- results.append(self._get_sentiment(self.roberta(text)[0]))
65
- results.append(self._get_sentiment(self.finbert_tone(text)[0]))
 
 
 
 
 
 
66
 
67
- # Return majority sentiment
68
  sentiment_counts = pd.Series(results).value_counts()
69
  return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
70
 
71
  except Exception as e:
 
72
  return "Neutral"
73
 
74
  def _get_sentiment(self, result):
@@ -81,11 +90,20 @@ class EventDetector:
81
 
82
  def process_file(file):
83
  try:
84
- df = pd.read_excel(file.name, sheet_name='Публикации')
 
 
 
 
 
85
  detector = EventDetector()
86
  processed_rows = []
 
87
 
88
- for _, row in df.iterrows():
 
 
 
89
  text = str(row.get('Выдержки из текста', ''))
90
  entity = str(row.get('Объект', ''))
91
 
@@ -100,62 +118,72 @@ def process_file(file):
100
  'Event_Summary': event_summary,
101
  'Текст': text
102
  })
103
-
104
- return pd.DataFrame(processed_rows)
105
-
106
- except Exception as e:
107
- # Return empty DataFrame instead of string
108
- return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])
109
-
110
- def analyze(file):
111
- if file is None:
112
- return None, None, None
113
 
114
- df = process_file(file)
115
- if df.empty:
116
- return df, None, None
117
 
118
- try:
119
- fig_sentiment, fig_events = create_visualizations(df)
120
- return df, fig_sentiment, fig_events
121
  except Exception as e:
122
- return df, None, None
 
 
123
 
124
  def create_visualizations(df):
125
  if df is None or df.empty:
126
  return None, None
127
 
128
- # Create sentiment distribution plot
129
- sentiments = df['Sentiment'].value_counts()
130
- fig_sentiment = go.Figure(data=[go.Pie(
131
- labels=sentiments.index,
132
- values=sentiments.values,
133
- marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
134
- )])
135
-
136
- # Create events distribution plot
137
- events = df['Event_Type'].value_counts()
138
- fig_events = go.Figure(data=[go.Bar(
139
- x=events.index,
140
- y=events.values,
141
- marker_color='#2196F3'
142
- )])
143
-
144
- return fig_sentiment, fig_events
 
 
 
 
 
145
 
146
  def create_interface():
147
- with gr.Blocks() as app:
148
- gr.Markdown("# AI-анализ мониторинга новостей v.1.05")
149
 
150
  with gr.Row():
151
- file_input = gr.File(label="Загрузите Excel файл")
 
 
 
 
152
 
153
  with gr.Row():
154
- analyze_btn = gr.Button("Начать анализ")
 
 
 
 
 
 
 
 
 
155
 
156
  with gr.Row():
157
- with gr.Column():
158
- stats = gr.DataFrame(label="Результаты анализа")
 
 
 
159
 
160
  with gr.Row():
161
  with gr.Column():
@@ -165,21 +193,30 @@ def create_interface():
165
 
166
  def analyze(file):
167
  if file is None:
168
- return None, None, None
 
 
 
 
 
 
 
 
 
169
 
170
- df = process_file(file)
171
- fig_sentiment, fig_events = create_visualizations(df)
172
-
173
- return df, fig_sentiment, fig_events
174
 
175
  analyze_btn.click(
176
  analyze,
177
  inputs=[file_input],
178
- outputs=[stats, sentiment_plot, events_plot]
179
  )
180
 
181
  return app
182
 
183
  if __name__ == "__main__":
184
  app = create_interface()
185
- app.launch()
 
3
  import pandas as pd
4
  import torch
5
  from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 
 
6
  import plotly.graph_objects as go
7
+ import logging
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
 
 
11
 
12
  class EventDetector:
13
  def __init__(self):
14
+ try:
15
+ logger.info(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
16
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ self.model_name = "google/mt5-small"
19
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
20
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
21
+
22
+ self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
23
+ self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
24
+ self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
25
+
26
+ logger.info("Models initialized successfully")
27
+
28
+ except Exception as e:
29
+ logger.error(f"Model initialization error: {e}")
30
+ raise
31
 
32
  @spaces.GPU(duration=120)
33
  def detect_events(self, text, entity):
 
45
  outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
46
  response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
47
 
 
48
  event_type = "Нет"
49
  if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
50
  event_type = "Отчетность"
 
56
  return event_type, response
57
 
58
  except Exception as e:
59
+ logger.error(f"Event detection error: {e}")
60
  return "Нет", f"Error: {str(e)}"
61
 
62
  @spaces.GPU(duration=60)
63
  def analyze_sentiment(self, text):
64
  try:
65
  results = []
66
+ texts = [text[:512]] # Truncate to avoid token length issues
67
+
68
+ for model in [self.finbert, self.roberta, self.finbert_tone]:
69
+ try:
70
+ result = model(texts)[0]
71
+ results.append(self._get_sentiment(result))
72
+ except Exception as e:
73
+ logger.error(f"Model inference error: {e}")
74
+ results.append("Neutral")
75
 
 
76
  sentiment_counts = pd.Series(results).value_counts()
77
  return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
78
 
79
  except Exception as e:
80
+ logger.error(f"Sentiment analysis error: {e}")
81
  return "Neutral"
82
 
83
  def _get_sentiment(self, result):
 
90
 
91
  def process_file(file):
92
  try:
93
+ gr.Info("Starting file processing...")
94
+ if isinstance(file, str):
95
+ df = pd.read_excel(file, sheet_name='Публикации')
96
+ else:
97
+ df = pd.read_excel(file.name, sheet_name='Публикации')
98
+
99
  detector = EventDetector()
100
  processed_rows = []
101
+ total = len(df)
102
 
103
+ for idx, row in df.iterrows():
104
+ if idx % 10 == 0:
105
+ gr.Info(f"Processing {idx}/{total} rows...")
106
+
107
  text = str(row.get('Выдержки из текста', ''))
108
  entity = str(row.get('Объект', ''))
109
 
 
118
  'Event_Summary': event_summary,
119
  'Текст': text
120
  })
 
 
 
 
 
 
 
 
 
 
121
 
122
+ result_df = pd.DataFrame(processed_rows)
123
+ gr.Info("File processing complete!")
124
+ return result_df
125
 
 
 
 
126
  except Exception as e:
127
+ logger.error(f"File processing error: {e}")
128
+ gr.Error(f"Error processing file: {str(e)}")
129
+ return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])
130
 
131
  def create_visualizations(df):
132
  if df is None or df.empty:
133
  return None, None
134
 
135
+ try:
136
+ sentiments = df['Sentiment'].value_counts()
137
+ fig_sentiment = go.Figure(data=[go.Pie(
138
+ labels=sentiments.index,
139
+ values=sentiments.values,
140
+ marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
141
+ )])
142
+ fig_sentiment.update_layout(title="Распределение тональности")
143
+
144
+ events = df['Event_Type'].value_counts()
145
+ fig_events = go.Figure(data=[go.Bar(
146
+ x=events.index,
147
+ y=events.values,
148
+ marker_color='#2196F3'
149
+ )])
150
+ fig_events.update_layout(title="Распределение событий")
151
+
152
+ return fig_sentiment, fig_events
153
+
154
+ except Exception as e:
155
+ logger.error(f"Visualization error: {e}")
156
+ return None, None
157
 
158
  def create_interface():
159
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
160
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.06")
161
 
162
  with gr.Row():
163
+ file_input = gr.File(
164
+ label="Загрузите Excel файл",
165
+ file_types=[".xlsx"],
166
+ type="file"
167
+ )
168
 
169
  with gr.Row():
170
+ analyze_btn = gr.Button(
171
+ "Начать анализ",
172
+ variant="primary"
173
+ )
174
+
175
+ with gr.Row():
176
+ progress = gr.Textbox(
177
+ label="Статус",
178
+ interactive=False
179
+ )
180
 
181
  with gr.Row():
182
+ stats = gr.DataFrame(
183
+ label="Результаты анализа",
184
+ interactive=False,
185
+ wrap=True
186
+ )
187
 
188
  with gr.Row():
189
  with gr.Column():
 
193
 
194
  def analyze(file):
195
  if file is None:
196
+ gr.Warning("Пожалуйста, загрузите файл")
197
+ return None, None, None, "Ожидание файла"
198
+ try:
199
+ progress.update("Обработка начата...")
200
+ df = process_file(file)
201
+ if df.empty:
202
+ return None, None, None, "Нет данных для обработки"
203
+
204
+ fig_sentiment, fig_events = create_visualizations(df)
205
+ return df, fig_sentiment, fig_events, "Обработка завершена"
206
 
207
+ except Exception as e:
208
+ logger.error(f"Analysis error: {e}")
209
+ gr.Error(f"Ошибка анализа: {str(e)}")
210
+ return None, None, None, f"Ошибка: {str(e)}"
211
 
212
  analyze_btn.click(
213
  analyze,
214
  inputs=[file_input],
215
+ outputs=[stats, sentiment_plot, events_plot, progress]
216
  )
217
 
218
  return app
219
 
220
  if __name__ == "__main__":
221
  app = create_interface()
222
+ app.launch(share=True)