Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

c9620e1

1 Parent(s): e02a4af

v.1.25

Browse files

Files changed (1) hide show

app.py +188 -60

app.py CHANGED Viewed

@@ -63,41 +63,13 @@ class ProcessControl:
 class EventDetector:
     def __init__(self):
-        """Initialize models with GPU support"""
         try:
-            # Initialize sentiment models
             device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Initializing models on device: {device}")
-            self.finbert = pipeline(
-                "sentiment-analysis",
-                model="ProsusAI/finbert",
-                device=device,
-                truncation=True,
-                max_length=512
-            )
-            self.roberta = pipeline(
-                "sentiment-analysis",
-                model="cardiffnlp/twitter-roberta-base-sentiment",
-                device=device,
-                truncation=True,
-                max_length=512
-            )
-            self.finbert_tone = pipeline(
-                "sentiment-analysis",
-                model="yiyanghkust/finbert-tone",
-                device=device,
-                truncation=True,
-                max_length=512
-            )
-            # Initialize MT5 model
-            self.model_name = "google/mt5-small"
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                self.model_name,
-                legacy=True
-            )
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
             self.device = device
             self.initialized = True
@@ -106,14 +78,90 @@ class EventDetector:
         except Exception as e:
             logger.error(f"Error in EventDetector initialization: {str(e)}")
             raise
     @spaces.GPU(duration=30)
-    def initialize_models(self):
-        """Keep this method for compatibility, now just returns initialization status"""
-        return self.initialized
     def analyze_sentiment(self, text):
-        """Rest of the analyze_sentiment method remains the same"""
         try:
             if not text or not isinstance(text, str):
                 return "Neutral"
@@ -153,7 +201,100 @@ class EventDetector:
         except Exception as e:
             logger.error(f"Sentiment analysis error: {str(e)}")
             return "Neutral"
     def detect_events(self, text, entity):
         """Rest of the detect_events method remains the same"""
         if not text or not entity:
@@ -237,6 +378,7 @@ Summary: [2-3 sentence summary]</s>"""
         """Clean up GPU resources"""
         try:
             self.model = None
             self.finbert = None
             self.roberta = None
             self.finbert_tone = None
@@ -384,7 +526,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.24+")
         with gr.Row():
             file_input = gr.File(
@@ -438,30 +580,13 @@ def create_interface():
                 return None, None, None, "Ожидание файла..."
             try:
-                # Reset stop flag
                 control.reset()
                 file_obj = io.BytesIO(file_bytes)
                 logger.info("File loaded into BytesIO successfully")
-                detector = EventDetector()
-                # Initialize models with GPU
-                @spaces.GPU(duration=30)
-                def init_models():
-                    return detector.initialize_models()
-                if not init_models():
-                    raise Exception("Failed to initialize models")
-                # Process in batches with GPU allocation
-                @spaces.GPU(duration=20)
-                def process_batch(batch, entity):
-                    event_type, event_summary = detector.detect_events(batch, entity)
-                    time.sleep(1)  # Wait between GPU operations
-                    sentiment = detector.analyze_sentiment(batch)
-                    return event_type, event_summary, sentiment
                 # Read and deduplicate data
                 df = pd.read_excel(file_obj, sheet_name='Публикации')
                 original_count = len(df)
@@ -488,14 +613,17 @@ def create_interface():
                                 continue
                             # Process with GPU
-                            event_type, event_summary, sentiment = process_batch(text, entity)
                             processed_rows.append({
                                 'Объект': entity,
                                 'Заголовок': str(row.get('Заголовок', '')),
-                                'Sentiment': sentiment,
-                                'Event_Type': event_type,
-                                'Event_Summary': event_summary,
                                 'Текст': text[:1000]
                             })

 class EventDetector:
     def __init__(self):
         try:
+            # Initialize models
             device = "cuda" if torch.cuda.is_available() else "cpu"
             logger.info(f"Initializing models on device: {device}")
+            # Initialize all models
+            self.initialize_models(device)  # Move initialization to separate method
             self.device = device
             self.initialized = True
         except Exception as e:
             logger.error(f"Error in EventDetector initialization: {str(e)}")
             raise
     @spaces.GPU(duration=30)
+    def initialize_models(self, device):
+        """Initialize all models with GPU support"""
+        # Initialize translation model
+        self.translator = pipeline(
+            "translation",
+            model="Helsinki-NLP/opus-mt-ru-en",
+            device=device
+        )
+        # Initialize sentiment models
+        self.finbert = pipeline(
+            "sentiment-analysis",
+            model="ProsusAI/finbert",
+            device=device,
+            truncation=True,
+            max_length=512
+        )
+        self.roberta = pipeline(
+            "sentiment-analysis",
+            model="cardiffnlp/twitter-roberta-base-sentiment",
+            device=device,
+            truncation=True,
+            max_length=512
+        )
+        self.finbert_tone = pipeline(
+            "sentiment-analysis",
+            model="yiyanghkust/finbert-tone",
+            device=device,
+            truncation=True,
+            max_length=512
+        )
+        # Initialize MT5 model
+        self.model_name = "google/mt5-small"
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name,
+            legacy=True
+        )
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
+        # Initialize Groq
+        if 'groq_key' in gr.secrets:
+            self.groq = ChatOpenAI(
+                base_url="https://api.groq.com/openai/v1",
+                model="llama-3.1-70b-versatile",
+                openai_api_key=gr.secrets['groq_key'],
+                temperature=0.0
+            )
+        else:
+            logger.warning("Groq API key not found, impact estimation will be limited")
+            self.groq = None
+    @spaces.GPU(duration=20)
+    def _translate_text(self, text):
+        """Translate Russian text to English"""
+        try:
+            if not text or not isinstance(text, str):
+                return ""
+            text = text.strip()
+            if not text:
+                return ""
+            # Split into manageable chunks
+            max_length = 450
+            chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]
+            translated_chunks = []
+            for chunk in chunks:
+                result = self.translator(chunk)[0]['translation_text']
+                translated_chunks.append(result)
+                time.sleep(0.1)  # Rate limiting
+            return " ".join(translated_chunks)
+        except Exception as e:
+            logger.error(f"Translation error: {str(e)}")
+            return text
+    @spaces.GPU(duration=20)
     def analyze_sentiment(self, text):
+        """Analyze sentiment of text (should be in English)"""
         try:
             if not text or not isinstance(text, str):
                 return "Neutral"
         except Exception as e:
             logger.error(f"Sentiment analysis error: {str(e)}")
             return "Neutral"
+    def estimate_impact(self, text, entity):
+        """Estimate impact using Groq for negative sentiment texts"""
+        try:
+            if not self.groq:
+                return "Неопределенный эффект", "Groq API недоступен"
+            template = """
+            You are a financial analyst. Analyze this news about {entity} and assess its potential impact.
+            News: {news}
+            Classify the impact into one of these categories:
+            1. "Значительный риск убытков" (Significant loss risk)
+            2. "Умеренный риск убытков" (Moderate loss risk)
+            3. "Незначительный риск убытков" (Minor loss risk)
+            4. "Вероятность прибыли" (Potential profit)
+            5. "Неопределенный эффект" (Uncertain effect)
+            Format your response exactly as:
+            Impact: [category]
+            Reasoning: [explanation in 2-3 sentences]
+            """
+            prompt = PromptTemplate(template=template, input_variables=["entity", "news"])
+            chain = prompt | self.groq
+            response = chain.invoke({
+                "entity": entity,
+                "news": text
+            })
+            # Parse response
+            response_text = response.content if hasattr(response, 'content') else str(response)
+            if "Impact:" in response_text and "Reasoning:" in response_text:
+                parts = response_text.split("Reasoning:")
+                impact = parts[0].split("Impact:")[1].strip()
+                reasoning = parts[1].strip()
+            else:
+                impact = "Неопределенный эффект"
+                reasoning = "Не удалось определить влияние"
+            return impact, reasoning
+        except Exception as e:
+            logger.error(f"Impact estimation error: {str(e)}")
+            return "Неопределенный эффект", f"Ошибка анализа: {str(e)}"
+    @spaces.GPU(duration=60)
+    def process_text(self, text, entity):
+        """Process text through translation, sentiment, and impact analysis"""
+        try:
+            # Translate text
+            translated_text = self._translate_text(text)
+            # Analyze sentiment
+            sentiment = self.analyze_sentiment(translated_text)
+            # Initialize impact and reasoning
+            impact = "Неопределенный эффект"
+            reasoning = ""
+            # If sentiment is negative, estimate impact
+            if sentiment == "Negative":
+                impact, reasoning = self.estimate_impact(translated_text, entity)
+            # Detect events
+            event_type, event_summary = self.detect_events(text, entity)
+            return {
+                'translated_text': translated_text,
+                'sentiment': sentiment,
+                'impact': impact,
+                'reasoning': reasoning,
+                'event_type': event_type,
+                'event_summary': event_summary
+            }
+        except Exception as e:
+            logger.error(f"Text processing error: {str(e)}")
+            return {
+                'translated_text': '',
+                'sentiment': 'Neutral',
+                'impact': 'Неопределенный эффект',
+                'reasoning': f'Ошибка обработки: {str(e)}',
+                'event_type': 'Нет',
+                'event_summary': ''
+            }
+    @spaces.GPU(duration=20)
     def detect_events(self, text, entity):
         """Rest of the detect_events method remains the same"""
         if not text or not entity:
         """Clean up GPU resources"""
         try:
             self.model = None
+            self.translator = None
             self.finbert = None
             self.roberta = None
             self.finbert_tone = None
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-анализ мониторинга новостей v.1.25")
         with gr.Row():
             file_input = gr.File(
                 return None, None, None, "Ожидание файла..."
             try:
+                # Reset control and initialize detector
                 control.reset()
+                detector = EventDetector()
                 file_obj = io.BytesIO(file_bytes)
                 logger.info("File loaded into BytesIO successfully")
                 # Read and deduplicate data
                 df = pd.read_excel(file_obj, sheet_name='Публикации')
                 original_count = len(df)
                                 continue
                             # Process with GPU
+                            results = detector.process_text(text, entity)
                             processed_rows.append({
                                 'Объект': entity,
                                 'Заголовок': str(row.get('Заголовок', '')),
+                                'Translated': results['translated_text'],
+                                'Sentiment': results['sentiment'],
+                                'Impact': results['impact'],
+                                'Reasoning': results['reasoning'],
+                                'Event_Type': results['event_type'],
+                                'Event_Summary': results['event_summary'],
                                 'Текст': text[:1000]
                             })