Spaces:
Sleeping
Sleeping
Commit
·
92287cb
1
Parent(s):
458b69b
v.1.06
Browse files
app.py
CHANGED
@@ -3,28 +3,31 @@ import spaces
|
|
3 |
import pandas as pd
|
4 |
import torch
|
5 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
6 |
-
from transformers import AutoModelForCausalLM
|
7 |
-
import time
|
8 |
import plotly.graph_objects as go
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
from openpyxl import load_workbook
|
14 |
-
from openpyxl.utils.dataframe import dataframe_to_rows
|
15 |
|
16 |
class EventDetector:
|
17 |
def __init__(self):
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
@spaces.GPU(duration=120)
|
30 |
def detect_events(self, text, entity):
|
@@ -42,7 +45,6 @@ class EventDetector:
|
|
42 |
outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
|
43 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
44 |
|
45 |
-
# Event type classification logic
|
46 |
event_type = "Нет"
|
47 |
if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
|
48 |
event_type = "Отчетность"
|
@@ -54,21 +56,28 @@ class EventDetector:
|
|
54 |
return event_type, response
|
55 |
|
56 |
except Exception as e:
|
|
|
57 |
return "Нет", f"Error: {str(e)}"
|
58 |
|
59 |
@spaces.GPU(duration=60)
|
60 |
def analyze_sentiment(self, text):
|
61 |
try:
|
62 |
results = []
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
-
# Return majority sentiment
|
68 |
sentiment_counts = pd.Series(results).value_counts()
|
69 |
return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
|
70 |
|
71 |
except Exception as e:
|
|
|
72 |
return "Neutral"
|
73 |
|
74 |
def _get_sentiment(self, result):
|
@@ -81,11 +90,20 @@ class EventDetector:
|
|
81 |
|
82 |
def process_file(file):
|
83 |
try:
|
84 |
-
|
|
|
|
|
|
|
|
|
|
|
85 |
detector = EventDetector()
|
86 |
processed_rows = []
|
|
|
87 |
|
88 |
-
for
|
|
|
|
|
|
|
89 |
text = str(row.get('Выдержки из текста', ''))
|
90 |
entity = str(row.get('Объект', ''))
|
91 |
|
@@ -100,62 +118,72 @@ def process_file(file):
|
|
100 |
'Event_Summary': event_summary,
|
101 |
'Текст': text
|
102 |
})
|
103 |
-
|
104 |
-
return pd.DataFrame(processed_rows)
|
105 |
-
|
106 |
-
except Exception as e:
|
107 |
-
# Return empty DataFrame instead of string
|
108 |
-
return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])
|
109 |
-
|
110 |
-
def analyze(file):
|
111 |
-
if file is None:
|
112 |
-
return None, None, None
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
return
|
117 |
|
118 |
-
try:
|
119 |
-
fig_sentiment, fig_events = create_visualizations(df)
|
120 |
-
return df, fig_sentiment, fig_events
|
121 |
except Exception as e:
|
122 |
-
|
|
|
|
|
123 |
|
124 |
def create_visualizations(df):
|
125 |
if df is None or df.empty:
|
126 |
return None, None
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
def create_interface():
|
147 |
-
with gr.Blocks() as app:
|
148 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
149 |
|
150 |
with gr.Row():
|
151 |
-
file_input = gr.File(
|
|
|
|
|
|
|
|
|
152 |
|
153 |
with gr.Row():
|
154 |
-
analyze_btn = gr.Button(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
with gr.Row():
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
159 |
|
160 |
with gr.Row():
|
161 |
with gr.Column():
|
@@ -165,21 +193,30 @@ def create_interface():
|
|
165 |
|
166 |
def analyze(file):
|
167 |
if file is None:
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
|
175 |
analyze_btn.click(
|
176 |
analyze,
|
177 |
inputs=[file_input],
|
178 |
-
outputs=[stats, sentiment_plot, events_plot]
|
179 |
)
|
180 |
|
181 |
return app
|
182 |
|
183 |
if __name__ == "__main__":
|
184 |
app = create_interface()
|
185 |
-
app.launch()
|
|
|
3 |
import pandas as pd
|
4 |
import torch
|
5 |
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
|
|
|
6 |
import plotly.graph_objects as go
|
7 |
+
import logging
|
8 |
+
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
11 |
|
12 |
class EventDetector:
|
13 |
def __init__(self):
|
14 |
+
try:
|
15 |
+
logger.info(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")
|
16 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
|
18 |
+
self.model_name = "google/mt5-small"
|
19 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
20 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self.device)
|
21 |
+
|
22 |
+
self.finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=self.device)
|
23 |
+
self.roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment", device=self.device)
|
24 |
+
self.finbert_tone = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", device=self.device)
|
25 |
+
|
26 |
+
logger.info("Models initialized successfully")
|
27 |
+
|
28 |
+
except Exception as e:
|
29 |
+
logger.error(f"Model initialization error: {e}")
|
30 |
+
raise
|
31 |
|
32 |
@spaces.GPU(duration=120)
|
33 |
def detect_events(self, text, entity):
|
|
|
45 |
outputs = self.model.generate(**inputs, max_length=300, num_return_sequences=1)
|
46 |
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
47 |
|
|
|
48 |
event_type = "Нет"
|
49 |
if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
|
50 |
event_type = "Отчетность"
|
|
|
56 |
return event_type, response
|
57 |
|
58 |
except Exception as e:
|
59 |
+
logger.error(f"Event detection error: {e}")
|
60 |
return "Нет", f"Error: {str(e)}"
|
61 |
|
62 |
@spaces.GPU(duration=60)
|
63 |
def analyze_sentiment(self, text):
|
64 |
try:
|
65 |
results = []
|
66 |
+
texts = [text[:512]] # Truncate to avoid token length issues
|
67 |
+
|
68 |
+
for model in [self.finbert, self.roberta, self.finbert_tone]:
|
69 |
+
try:
|
70 |
+
result = model(texts)[0]
|
71 |
+
results.append(self._get_sentiment(result))
|
72 |
+
except Exception as e:
|
73 |
+
logger.error(f"Model inference error: {e}")
|
74 |
+
results.append("Neutral")
|
75 |
|
|
|
76 |
sentiment_counts = pd.Series(results).value_counts()
|
77 |
return sentiment_counts.index[0] if sentiment_counts.iloc[0] >= 2 else "Neutral"
|
78 |
|
79 |
except Exception as e:
|
80 |
+
logger.error(f"Sentiment analysis error: {e}")
|
81 |
return "Neutral"
|
82 |
|
83 |
def _get_sentiment(self, result):
|
|
|
90 |
|
91 |
def process_file(file):
|
92 |
try:
|
93 |
+
gr.Info("Starting file processing...")
|
94 |
+
if isinstance(file, str):
|
95 |
+
df = pd.read_excel(file, sheet_name='Публикации')
|
96 |
+
else:
|
97 |
+
df = pd.read_excel(file.name, sheet_name='Публикации')
|
98 |
+
|
99 |
detector = EventDetector()
|
100 |
processed_rows = []
|
101 |
+
total = len(df)
|
102 |
|
103 |
+
for idx, row in df.iterrows():
|
104 |
+
if idx % 10 == 0:
|
105 |
+
gr.Info(f"Processing {idx}/{total} rows...")
|
106 |
+
|
107 |
text = str(row.get('Выдержки из текста', ''))
|
108 |
entity = str(row.get('Объект', ''))
|
109 |
|
|
|
118 |
'Event_Summary': event_summary,
|
119 |
'Текст': text
|
120 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
+
result_df = pd.DataFrame(processed_rows)
|
123 |
+
gr.Info("File processing complete!")
|
124 |
+
return result_df
|
125 |
|
|
|
|
|
|
|
126 |
except Exception as e:
|
127 |
+
logger.error(f"File processing error: {e}")
|
128 |
+
gr.Error(f"Error processing file: {str(e)}")
|
129 |
+
return pd.DataFrame(columns=['Объект', 'Заголовок', 'Sentiment', 'Event_Type', 'Event_Summary', 'Текст'])
|
130 |
|
131 |
def create_visualizations(df):
|
132 |
if df is None or df.empty:
|
133 |
return None, None
|
134 |
|
135 |
+
try:
|
136 |
+
sentiments = df['Sentiment'].value_counts()
|
137 |
+
fig_sentiment = go.Figure(data=[go.Pie(
|
138 |
+
labels=sentiments.index,
|
139 |
+
values=sentiments.values,
|
140 |
+
marker_colors=['#FF6B6B', '#4ECDC4', '#95A5A6']
|
141 |
+
)])
|
142 |
+
fig_sentiment.update_layout(title="Распределение тональности")
|
143 |
+
|
144 |
+
events = df['Event_Type'].value_counts()
|
145 |
+
fig_events = go.Figure(data=[go.Bar(
|
146 |
+
x=events.index,
|
147 |
+
y=events.values,
|
148 |
+
marker_color='#2196F3'
|
149 |
+
)])
|
150 |
+
fig_events.update_layout(title="Распределение событий")
|
151 |
+
|
152 |
+
return fig_sentiment, fig_events
|
153 |
+
|
154 |
+
except Exception as e:
|
155 |
+
logger.error(f"Visualization error: {e}")
|
156 |
+
return None, None
|
157 |
|
158 |
def create_interface():
|
159 |
+
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
160 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.06")
|
161 |
|
162 |
with gr.Row():
|
163 |
+
file_input = gr.File(
|
164 |
+
label="Загрузите Excel файл",
|
165 |
+
file_types=[".xlsx"],
|
166 |
+
type="file"
|
167 |
+
)
|
168 |
|
169 |
with gr.Row():
|
170 |
+
analyze_btn = gr.Button(
|
171 |
+
"Начать анализ",
|
172 |
+
variant="primary"
|
173 |
+
)
|
174 |
+
|
175 |
+
with gr.Row():
|
176 |
+
progress = gr.Textbox(
|
177 |
+
label="Статус",
|
178 |
+
interactive=False
|
179 |
+
)
|
180 |
|
181 |
with gr.Row():
|
182 |
+
stats = gr.DataFrame(
|
183 |
+
label="Результаты анализа",
|
184 |
+
interactive=False,
|
185 |
+
wrap=True
|
186 |
+
)
|
187 |
|
188 |
with gr.Row():
|
189 |
with gr.Column():
|
|
|
193 |
|
194 |
def analyze(file):
|
195 |
if file is None:
|
196 |
+
gr.Warning("Пожалуйста, загрузите файл")
|
197 |
+
return None, None, None, "Ожидание файла"
|
198 |
+
try:
|
199 |
+
progress.update("Обработка начата...")
|
200 |
+
df = process_file(file)
|
201 |
+
if df.empty:
|
202 |
+
return None, None, None, "Нет данных для обработки"
|
203 |
+
|
204 |
+
fig_sentiment, fig_events = create_visualizations(df)
|
205 |
+
return df, fig_sentiment, fig_events, "Обработка завершена"
|
206 |
|
207 |
+
except Exception as e:
|
208 |
+
logger.error(f"Analysis error: {e}")
|
209 |
+
gr.Error(f"Ошибка анализа: {str(e)}")
|
210 |
+
return None, None, None, f"Ошибка: {str(e)}"
|
211 |
|
212 |
analyze_btn.click(
|
213 |
analyze,
|
214 |
inputs=[file_input],
|
215 |
+
outputs=[stats, sentiment_plot, events_plot, progress]
|
216 |
)
|
217 |
|
218 |
return app
|
219 |
|
220 |
if __name__ == "__main__":
|
221 |
app = create_interface()
|
222 |
+
app.launch(share=True)
|