Spaces:

pentarosarium
/

gprocess

Sleeping

App Files Files Community

pentarosarium commited on Nov 20, 2024

Commit

412ee33

1 Parent(s): 2a0d401

v.1.14

Browse files

Files changed (1) hide show

app.py +100 -5

app.py CHANGED Viewed

@@ -44,13 +44,108 @@ class ProcessControl:
 class EventDetector:
     def __init__(self):
         self.model_name = "google/mt5-small"
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.model = None
         self.finbert = None
         self.roberta = None
         self.finbert_tone = None
         self.control = ProcessControl()
     def get_sentiment_label(self, result):
         """Helper method for sentiment classification"""
         label = result['label'].lower()
@@ -72,9 +167,9 @@ class EventDetector:
             try:
                 inputs = [truncated_text]
-                finbert_result = self.finbert(inputs, truncation=True, max_length=512)[0]
-                roberta_result = self.roberta(inputs, truncation=True, max_length=512)[0]
-                finbert_tone_result = self.finbert_tone(inputs, truncation=True, max_length=512)[0]
                 results = [
                     self.get_sentiment_label(finbert_result),
@@ -182,7 +277,7 @@ def create_interface():
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
-        gr.Markdown("# AI-анализ мониторинга новостей v.1.13")
         with gr.Row():
             file_input = gr.File(

 class EventDetector:
     def __init__(self):
         self.model_name = "google/mt5-small"
+        # Initialize tokenizer with legacy=True to suppress warning
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name,
+            legacy=True
+        )
         self.model = None
         self.finbert = None
         self.roberta = None
         self.finbert_tone = None
         self.control = ProcessControl()
+    @spaces.GPU
+    def initialize_models(self):
+        """Initialize all models with GPU support"""
+        try:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Initializing models on device: {device}")
+            # Initialize MT5 model
+            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(device)
+            # Initialize sentiment analysis pipelines
+            self.finbert = pipeline(
+                "sentiment-analysis",
+                model="ProsusAI/finbert",
+                device=device,
+                truncation=True,
+                max_length=512
+            )
+            self.roberta = pipeline(
+                "sentiment-analysis",
+                model="cardiffnlp/twitter-roberta-base-sentiment",
+                device=device,
+                truncation=True,
+                max_length=512
+            )
+            self.finbert_tone = pipeline(
+                "sentiment-analysis",
+                model="yiyanghkust/finbert-tone",
+                device=device,
+                truncation=True,
+                max_length=512
+            )
+            logger.info("All models initialized successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Model initialization error: {str(e)}")
+            return False
+    @spaces.GPU
+    def detect_events(self, text, entity):
+        if not text or not entity:
+            return "Нет", "Invalid input"
+        try:
+            # Check if models are initialized
+            if self.model is None:
+                if not self.initialize_models():
+                    return "Нет", "Model initialization failed"
+            # Truncate input text
+            text = text[:500]
+            prompt = f"""<s>Analyze the following news about {entity}:
+            Text: {text}
+            Task: Identify the main event type and provide a brief summary.</s>"""
+            inputs = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=512
+            ).to(self.model.device)
+            outputs = self.model.generate(
+                **inputs,
+                max_length=300,
+                num_return_sequences=1,
+                pad_token_id=self.tokenizer.pad_token_id,
+                eos_token_id=self.tokenizer.eos_token_id
+            )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            event_type = "Нет"
+            if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
+                event_type = "Отчетность"
+            elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт']):
+                event_type = "РЦБ"
+            elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
+                event_type = "Суд"
+            return event_type, response
+        except Exception as e:
+            logger.error(f"Event detection error: {str(e)}")
+            return "Нет", f"Error: {str(e)}"
     def get_sentiment_label(self, result):
         """Helper method for sentiment classification"""
         label = result['label'].lower()
             try:
                 inputs = [truncated_text]
+                finbert_result = self.finbert(inputs)[0]
+                roberta_result = self.roberta(inputs)[0]
+                finbert_tone_result = self.finbert_tone(inputs)[0]
                 results = [
                     self.get_sentiment_label(finbert_result),
     control = ProcessControl()
     with gr.Blocks(theme=gr.themes.Soft()) as app:
+        gr.Markdown("# AI-а��ализ мониторинга новостей v.1.14")
         with gr.Row():
             file_input = gr.File(