pentarosarium commited on
Commit
c9620e1
·
1 Parent(s): e02a4af
Files changed (1) hide show
  1. app.py +188 -60
app.py CHANGED
@@ -63,41 +63,13 @@ class ProcessControl:
63
 
64
  class EventDetector:
65
  def __init__(self):
66
- """Initialize models with GPU support"""
67
  try:
68
- # Initialize sentiment models
69
  device = "cuda" if torch.cuda.is_available() else "cpu"
70
  logger.info(f"Initializing models on device: {device}")
71
 
72
- self.finbert = pipeline(
73
- "sentiment-analysis",
74
- model="ProsusAI/finbert",
75
- device=device,
76
- truncation=True,
77
- max_length=512
78
- )
79
- self.roberta = pipeline(
80
- "sentiment-analysis",
81
- model="cardiffnlp/twitter-roberta-base-sentiment",
82
- device=device,
83
- truncation=True,
84
- max_length=512
85
- )
86
- self.finbert_tone = pipeline(
87
- "sentiment-analysis",
88
- model="yiyanghkust/finbert-tone",
89
- device=device,
90
- truncation=True,
91
- max_length=512
92
- )
93
-
94
- # Initialize MT5 model
95
- self.model_name = "google/mt5-small"
96
- self.tokenizer = AutoTokenizer.from_pretrained(
97
- self.model_name,
98
- legacy=True
99
- )
100
- self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
101
 
102
  self.device = device
103
  self.initialized = True
@@ -106,14 +78,90 @@ class EventDetector:
106
  except Exception as e:
107
  logger.error(f"Error in EventDetector initialization: {str(e)}")
108
  raise
109
-
110
  @spaces.GPU(duration=30)
111
- def initialize_models(self):
112
- """Keep this method for compatibility, now just returns initialization status"""
113
- return self.initialized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def analyze_sentiment(self, text):
116
- """Rest of the analyze_sentiment method remains the same"""
117
  try:
118
  if not text or not isinstance(text, str):
119
  return "Neutral"
@@ -153,7 +201,100 @@ class EventDetector:
153
  except Exception as e:
154
  logger.error(f"Sentiment analysis error: {str(e)}")
155
  return "Neutral"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
 
 
157
  def detect_events(self, text, entity):
158
  """Rest of the detect_events method remains the same"""
159
  if not text or not entity:
@@ -237,6 +378,7 @@ Summary: [2-3 sentence summary]</s>"""
237
  """Clean up GPU resources"""
238
  try:
239
  self.model = None
 
240
  self.finbert = None
241
  self.roberta = None
242
  self.finbert_tone = None
@@ -384,7 +526,7 @@ def create_interface():
384
  control = ProcessControl()
385
 
386
  with gr.Blocks(theme=gr.themes.Soft()) as app:
387
- gr.Markdown("# AI-анализ мониторинга новостей v.1.24+")
388
 
389
  with gr.Row():
390
  file_input = gr.File(
@@ -438,30 +580,13 @@ def create_interface():
438
  return None, None, None, "Ожидание файла..."
439
 
440
  try:
441
- # Reset stop flag
442
  control.reset()
 
443
 
444
  file_obj = io.BytesIO(file_bytes)
445
  logger.info("File loaded into BytesIO successfully")
446
 
447
- detector = EventDetector()
448
-
449
- # Initialize models with GPU
450
- @spaces.GPU(duration=30)
451
- def init_models():
452
- return detector.initialize_models()
453
-
454
- if not init_models():
455
- raise Exception("Failed to initialize models")
456
-
457
- # Process in batches with GPU allocation
458
- @spaces.GPU(duration=20)
459
- def process_batch(batch, entity):
460
- event_type, event_summary = detector.detect_events(batch, entity)
461
- time.sleep(1) # Wait between GPU operations
462
- sentiment = detector.analyze_sentiment(batch)
463
- return event_type, event_summary, sentiment
464
-
465
  # Read and deduplicate data
466
  df = pd.read_excel(file_obj, sheet_name='Публикации')
467
  original_count = len(df)
@@ -488,14 +613,17 @@ def create_interface():
488
  continue
489
 
490
  # Process with GPU
491
- event_type, event_summary, sentiment = process_batch(text, entity)
492
 
493
  processed_rows.append({
494
  'Объект': entity,
495
  'Заголовок': str(row.get('Заголовок', '')),
496
- 'Sentiment': sentiment,
497
- 'Event_Type': event_type,
498
- 'Event_Summary': event_summary,
 
 
 
499
  'Текст': text[:1000]
500
  })
501
 
 
63
 
64
  class EventDetector:
65
  def __init__(self):
 
66
  try:
67
+ # Initialize models
68
  device = "cuda" if torch.cuda.is_available() else "cpu"
69
  logger.info(f"Initializing models on device: {device}")
70
 
71
+ # Initialize all models
72
+ self.initialize_models(device) # Move initialization to separate method
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  self.device = device
75
  self.initialized = True
 
78
  except Exception as e:
79
  logger.error(f"Error in EventDetector initialization: {str(e)}")
80
  raise
81
+
82
  @spaces.GPU(duration=30)
83
+ def initialize_models(self, device):
84
+ """Initialize all models with GPU support"""
85
+ # Initialize translation model
86
+ self.translator = pipeline(
87
+ "translation",
88
+ model="Helsinki-NLP/opus-mt-ru-en",
89
+ device=device
90
+ )
91
+
92
+ # Initialize sentiment models
93
+ self.finbert = pipeline(
94
+ "sentiment-analysis",
95
+ model="ProsusAI/finbert",
96
+ device=device,
97
+ truncation=True,
98
+ max_length=512
99
+ )
100
+ self.roberta = pipeline(
101
+ "sentiment-analysis",
102
+ model="cardiffnlp/twitter-roberta-base-sentiment",
103
+ device=device,
104
+ truncation=True,
105
+ max_length=512
106
+ )
107
+ self.finbert_tone = pipeline(
108
+ "sentiment-analysis",
109
+ model="yiyanghkust/finbert-tone",
110
+ device=device,
111
+ truncation=True,
112
+ max_length=512
113
+ )
114
+
115
+ # Initialize MT5 model
116
+ self.model_name = "google/mt5-small"
117
+ self.tokenizer = AutoTokenizer.from_pretrained(
118
+ self.model_name,
119
+ legacy=True
120
+ )
121
+ self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
122
+
123
+ # Initialize Groq
124
+ if 'groq_key' in gr.secrets:
125
+ self.groq = ChatOpenAI(
126
+ base_url="https://api.groq.com/openai/v1",
127
+ model="llama-3.1-70b-versatile",
128
+ openai_api_key=gr.secrets['groq_key'],
129
+ temperature=0.0
130
+ )
131
+ else:
132
+ logger.warning("Groq API key not found, impact estimation will be limited")
133
+ self.groq = None
134
 
135
+ @spaces.GPU(duration=20)
136
+ def _translate_text(self, text):
137
+ """Translate Russian text to English"""
138
+ try:
139
+ if not text or not isinstance(text, str):
140
+ return ""
141
+
142
+ text = text.strip()
143
+ if not text:
144
+ return ""
145
+
146
+ # Split into manageable chunks
147
+ max_length = 450
148
+ chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
149
+ translated_chunks = []
150
+
151
+ for chunk in chunks:
152
+ result = self.translator(chunk)[0]['translation_text']
153
+ translated_chunks.append(result)
154
+ time.sleep(0.1) # Rate limiting
155
+
156
+ return " ".join(translated_chunks)
157
+
158
+ except Exception as e:
159
+ logger.error(f"Translation error: {str(e)}")
160
+ return text
161
+
162
+ @spaces.GPU(duration=20)
163
  def analyze_sentiment(self, text):
164
+ """Analyze sentiment of text (should be in English)"""
165
  try:
166
  if not text or not isinstance(text, str):
167
  return "Neutral"
 
201
  except Exception as e:
202
  logger.error(f"Sentiment analysis error: {str(e)}")
203
  return "Neutral"
204
+
205
+ def estimate_impact(self, text, entity):
206
+ """Estimate impact using Groq for negative sentiment texts"""
207
+ try:
208
+ if not self.groq:
209
+ return "Неопределенный эффект", "Groq API недоступен"
210
+
211
+ template = """
212
+ You are a financial analyst. Analyze this news about {entity} and assess its potential impact.
213
+
214
+ News: {news}
215
+
216
+ Classify the impact into one of these categories:
217
+ 1. "Значительный риск убытков" (Significant loss risk)
218
+ 2. "Умеренный риск убытков" (Moderate loss risk)
219
+ 3. "Незначительный риск убытков" (Minor loss risk)
220
+ 4. "Вероятность прибыли" (Potential profit)
221
+ 5. "Неопределенный эффект" (Uncertain effect)
222
+
223
+ Format your response exactly as:
224
+ Impact: [category]
225
+ Reasoning: [explanation in 2-3 sentences]
226
+ """
227
+
228
+ prompt = PromptTemplate(template=template, input_variables=["entity", "news"])
229
+ chain = prompt | self.groq
230
+
231
+ response = chain.invoke({
232
+ "entity": entity,
233
+ "news": text
234
+ })
235
+
236
+ # Parse response
237
+ response_text = response.content if hasattr(response, 'content') else str(response)
238
+
239
+ if "Impact:" in response_text and "Reasoning:" in response_text:
240
+ parts = response_text.split("Reasoning:")
241
+ impact = parts[0].split("Impact:")[1].strip()
242
+ reasoning = parts[1].strip()
243
+ else:
244
+ impact = "Неопределенный эффект"
245
+ reasoning = "Не удалось определить влияние"
246
+
247
+ return impact, reasoning
248
+
249
+ except Exception as e:
250
+ logger.error(f"Impact estimation error: {str(e)}")
251
+ return "Неопределенный эффект", f"Ошибка анализа: {str(e)}"
252
+
253
+ @spaces.GPU(duration=60)
254
+ def process_text(self, text, entity):
255
+ """Process text through translation, sentiment, and impact analysis"""
256
+ try:
257
+ # Translate text
258
+ translated_text = self._translate_text(text)
259
+
260
+ # Analyze sentiment
261
+ sentiment = self.analyze_sentiment(translated_text)
262
+
263
+ # Initialize impact and reasoning
264
+ impact = "Неопределенный эффект"
265
+ reasoning = ""
266
+
267
+ # If sentiment is negative, estimate impact
268
+ if sentiment == "Negative":
269
+ impact, reasoning = self.estimate_impact(translated_text, entity)
270
+
271
+ # Detect events
272
+ event_type, event_summary = self.detect_events(text, entity)
273
+
274
+ return {
275
+ 'translated_text': translated_text,
276
+ 'sentiment': sentiment,
277
+ 'impact': impact,
278
+ 'reasoning': reasoning,
279
+ 'event_type': event_type,
280
+ 'event_summary': event_summary
281
+ }
282
+
283
+ except Exception as e:
284
+ logger.error(f"Text processing error: {str(e)}")
285
+ return {
286
+ 'translated_text': '',
287
+ 'sentiment': 'Neutral',
288
+ 'impact': 'Неопределенный эффект',
289
+ 'reasoning': f'Ошибка обработки: {str(e)}',
290
+ 'event_type': 'Нет',
291
+ 'event_summary': ''
292
+ }
293
+
294
+
295
 
296
+
297
+ @spaces.GPU(duration=20)
298
  def detect_events(self, text, entity):
299
  """Rest of the detect_events method remains the same"""
300
  if not text or not entity:
 
378
  """Clean up GPU resources"""
379
  try:
380
  self.model = None
381
+ self.translator = None
382
  self.finbert = None
383
  self.roberta = None
384
  self.finbert_tone = None
 
526
  control = ProcessControl()
527
 
528
  with gr.Blocks(theme=gr.themes.Soft()) as app:
529
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.25")
530
 
531
  with gr.Row():
532
  file_input = gr.File(
 
580
  return None, None, None, "Ожидание файла..."
581
 
582
  try:
583
+ # Reset control and initialize detector
584
  control.reset()
585
+ detector = EventDetector()
586
 
587
  file_obj = io.BytesIO(file_bytes)
588
  logger.info("File loaded into BytesIO successfully")
589
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  # Read and deduplicate data
591
  df = pd.read_excel(file_obj, sheet_name='Публикации')
592
  original_count = len(df)
 
613
  continue
614
 
615
  # Process with GPU
616
+ results = detector.process_text(text, entity)
617
 
618
  processed_rows.append({
619
  'Объект': entity,
620
  'Заголовок': str(row.get('Заголовок', '')),
621
+ 'Translated': results['translated_text'],
622
+ 'Sentiment': results['sentiment'],
623
+ 'Impact': results['impact'],
624
+ 'Reasoning': results['reasoning'],
625
+ 'Event_Type': results['event_type'],
626
+ 'Event_Summary': results['event_summary'],
627
  'Текст': text[:1000]
628
  })
629