pentarosarium commited on
Commit
08fb3e7
·
1 Parent(s): 1fd794f

progress more (3.2)

Browse files
Files changed (1) hide show
  1. app.py +135 -108
app.py CHANGED
@@ -15,31 +15,49 @@ from reportlab.lib import colors
15
  from reportlab.lib.pagesizes import letter
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
17
  from reportlab.lib.styles import getSampleStyleSheet
18
- from io import StringIO
 
19
  import contextlib
20
 
21
 
22
- @contextlib.contextmanager
23
- def capture_streamlit_output():
24
- # Create StringIO object to capture output
25
- output = StringIO()
26
- with contextlib.redirect_stdout(output):
27
- yield output
28
 
29
- def save_to_pdf(output_text):
30
- doc = SimpleDocTemplate("result.pdf", pagesize=letter)
31
- styles = getSampleStyleSheet()
32
- story = []
33
-
34
- # Split the captured output into lines
35
- lines = output_text.getvalue().split('\n')
36
- for line in lines:
37
- if line.strip(): # Skip empty lines
38
- p = Paragraph(line, styles['Normal'])
39
- story.append(p)
40
- story.append(Spacer(1, 12)) # Add space between paragraphs
41
-
42
- doc.build(story)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Initialize sentiment analyzers
45
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
@@ -193,78 +211,93 @@ def generate_sentiment_visualization(df):
193
  return fig
194
 
195
  def process_file(uploaded_file):
196
- df = pd.read_excel(uploaded_file, sheet_name='Публикации')
197
 
198
- required_columns = ['Объект', 'Заголовок', 'Выдержки из текста']
199
- missing_columns = [col for col in required_columns if col not in df.columns]
200
- if missing_columns:
201
- st.error(f"Error: The following required columns are missing from the input file: {', '.join(missing_columns)}")
202
- st.stop()
203
 
204
- # Initialize LLM
205
- llm = init_langchain_llm()
206
- if not llm:
207
- st.error("Не удалось инициализировать нейросеть. Пожалуйста, проверьте настройки и попробуйте снова.")
208
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- # Deduplication
211
- original_news_count = len(df)
212
- df = df.groupby('Объект').apply(
213
- lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
214
- ).reset_index(drop=True)
215
-
216
- remaining_news_count = len(df)
217
- duplicates_removed = original_news_count - remaining_news_count
218
- st.write(f"Из {original_news_count} новостных сообщений удалены {duplicates_removed} дублирующих. Осталось {remaining_news_count}.")
219
-
220
- # Initialize progress
221
- progress_bar = st.progress(0)
222
- status_text = st.empty()
223
-
224
- # Process each news item
225
- df['Translated'] = ''
226
- df['Sentiment'] = ''
227
- df['Impact'] = ''
228
- df['Reasoning'] = ''
229
-
230
- for index, row in df.iterrows():
231
- # First: Translate
232
- translated_text = translate_text(llm, row['Выдержки из текста'])
233
- df.at[index, 'Translated'] = translated_text
234
 
235
- # Second: Analyze sentiment
236
- sentiment = analyze_sentiment(translated_text)
237
- df.at[index, 'Sentiment'] = sentiment
238
 
239
- # Third: If negative, estimate impact
240
- if sentiment == "Negative":
241
- impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
242
- df.at[index, 'Impact'] = impact
243
- df.at[index, 'Reasoning'] = reasoning
244
 
245
- # Update progress
246
- progress = (index + 1) / len(df)
247
- progress_bar.progress(progress)
248
- status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
249
 
250
- # Display results
251
- st.write(f"Объект: {row['Объект']}")
252
- st.write(f"Новость: {row['Заголовок']}")
253
- st.write(f"Тональность: {sentiment}")
254
- if sentiment == "Negative":
255
- st.write(f"Эффект: {impact}")
256
- st.write(f"Обоснование: {reasoning}")
257
- st.write("---")
 
 
 
 
 
 
 
 
258
 
259
- progress_bar.empty()
260
- status_text.empty()
261
 
262
- # Generate visualization
263
- visualization = generate_sentiment_visualization(df)
264
- if visualization:
265
- st.pyplot(visualization)
266
 
267
- return df
 
 
 
 
 
268
 
269
  def create_analysis_data(df):
270
  analysis_data = []
@@ -359,27 +392,25 @@ def create_output_file(df, uploaded_file):
359
  return output
360
 
361
  def main():
362
- # Capture all output for PDF
363
- with capture_streamlit_output() as output:
364
- st.markdown(
365
- """
366
- <style>
367
- .signature {
368
- position: fixed;
369
- right: 12px;
370
- bottom: 12px;
371
- font-size: 14px;
372
- color: #FF0000;
373
- opacity: 0.9;
374
- z-index: 999;
375
- }
376
- </style>
377
- <div class="signature">denis.pokrovsky.npff</div>
378
- """,
379
- unsafe_allow_html=True
380
- )
381
-
382
- st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.1):::")
383
 
384
  if 'processed_df' not in st.session_state:
385
  st.session_state.processed_df = None
@@ -406,10 +437,6 @@ def main():
406
  formatted_time = format_elapsed_time(elapsed_time)
407
  st.success(f"Обработка и анализ завершены за {formatted_time}.")
408
 
409
- if st.session_state.processed_df is not None:
410
- save_to_pdf(output) # Save the captured output to PDF
411
-
412
-
413
  st.download_button(
414
  label="Скачать результат анализа",
415
  data=output,
 
15
  from reportlab.lib.pagesizes import letter
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
17
  from reportlab.lib.styles import getSampleStyleSheet
18
+ from io import StringIO, BytesIO
19
+ import sys
20
  import contextlib
21
 
22
 
23
+ class StreamlitOutputCapture:
24
+ def __init__(self):
25
+ self.output = []
 
 
 
26
 
27
+ def write(self, text):
28
+ self.output.append(text)
29
+
30
+ def getvalue(self):
31
+ return ''.join(self.output)
32
+
33
+ def flush(self):
34
+ pass
35
+
36
+ def save_to_pdf(captured_output):
37
+ try:
38
+ # Create PDF document
39
+ doc = SimpleDocTemplate("result.pdf", pagesize=letter)
40
+ styles = getSampleStyleSheet()
41
+ story = []
42
+
43
+ # Convert captured output to string and split into lines
44
+ output_text = captured_output.getvalue()
45
+ lines = output_text.split('\n')
46
+
47
+ # Add each line to the PDF
48
+ for line in lines:
49
+ if line.strip(): # Skip empty lines
50
+ # Clean the line and handle any encoding issues
51
+ cleaned_line = line.encode('utf-8', errors='ignore').decode('utf-8')
52
+ p = Paragraph(cleaned_line, styles['Normal'])
53
+ story.append(p)
54
+ story.append(Spacer(1, 12))
55
+
56
+ # Build the PDF
57
+ doc.build(story)
58
+ st.success("PDF файл 'result.pdf' успешно создан")
59
+ except Exception as e:
60
+ st.error(f"Ошибка при создании PDF: {str(e)}")
61
 
62
  # Initialize sentiment analyzers
63
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
 
211
  return fig
212
 
213
  def process_file(uploaded_file):
 
214
 
 
 
 
 
 
215
 
216
+ output_capture = StreamlitOutputCapture()
217
+ old_stdout = sys.stdout
218
+ sys.stdout = output_capture
219
+
220
+ try:
221
+ df = pd.read_excel(uploaded_file, sheet_name='Публикации')
222
+
223
+ required_columns = ['Объект', 'Заголовок', 'Выдержки из текста']
224
+ missing_columns = [col for col in required_columns if col not in df.columns]
225
+ if missing_columns:
226
+ st.error(f"Error: The following required columns are missing from the input file: {', '.join(missing_columns)}")
227
+ st.stop()
228
+
229
+ # Initialize LLM
230
+ llm = init_langchain_llm()
231
+ if not llm:
232
+ st.error("Не удалось инициализировать нейросеть. Пожалуйста, проверьте настройки и попробуйте снова.")
233
+ st.stop()
234
+
235
+ # Deduplication
236
+ original_news_count = len(df)
237
+ df = df.groupby('Объект').apply(
238
+ lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
239
+ ).reset_index(drop=True)
240
+
241
+ remaining_news_count = len(df)
242
+ duplicates_removed = original_news_count - remaining_news_count
243
+ st.write(f"Из {original_news_count} новостных сообщений удалены {duplicates_removed} дублирующих. Осталось {remaining_news_count}.")
244
 
245
+ # Initialize progress
246
+ progress_bar = st.progress(0)
247
+ status_text = st.empty()
248
+
249
+ # Process each news item
250
+ df['Translated'] = ''
251
+ df['Sentiment'] = ''
252
+ df['Impact'] = ''
253
+ df['Reasoning'] = ''
254
+
255
+ for index, row in df.iterrows():
256
+ # First: Translate
257
+ translated_text = translate_text(llm, row['Выдержки из текста'])
258
+ df.at[index, 'Translated'] = translated_text
 
 
 
 
 
 
 
 
 
 
259
 
260
+ # Second: Analyze sentiment
261
+ sentiment = analyze_sentiment(translated_text)
262
+ df.at[index, 'Sentiment'] = sentiment
263
 
264
+ # Third: If negative, estimate impact
265
+ if sentiment == "Negative":
266
+ impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
267
+ df.at[index, 'Impact'] = impact
268
+ df.at[index, 'Reasoning'] = reasoning
269
 
270
+ # Update progress
271
+ progress = (index + 1) / len(df)
272
+ progress_bar.progress(progress)
273
+ status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
274
 
275
+ # Display results
276
+ st.write(f"Объект: {row['Объект']}")
277
+ st.write(f"Новость: {row['Заголовок']}")
278
+ st.write(f"Тональность: {sentiment}")
279
+ if sentiment == "Negative":
280
+ st.write(f"Эффект: {impact}")
281
+ st.write(f"Обоснование: {reasoning}")
282
+ st.write("---")
283
+
284
+ progress_bar.empty()
285
+ status_text.empty()
286
+
287
+ # Generate visualization
288
+ visualization = generate_sentiment_visualization(df)
289
+ if visualization:
290
+ st.pyplot(visualization)
291
 
292
+ save_to_pdf(output_capture)
 
293
 
 
 
 
 
294
 
295
+ return df
296
+
297
+
298
+ finally:
299
+
300
+ sys.stdout = old_stdout
301
 
302
  def create_analysis_data(df):
303
  analysis_data = []
 
392
  return output
393
 
394
  def main():
395
+ st.markdown(
396
+ """
397
+ <style>
398
+ .signature {
399
+ position: fixed;
400
+ right: 12px;
401
+ bottom: 12px;
402
+ font-size: 14px;
403
+ color: #FF0000;
404
+ opacity: 0.9;
405
+ z-index: 999;
406
+ }
407
+ </style>
408
+ <div class="signature">denis.pokrovsky.npff</div>
409
+ """,
410
+ unsafe_allow_html=True
411
+ )
412
+
413
+ st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.2):::")
 
 
414
 
415
  if 'processed_df' not in st.session_state:
416
  st.session_state.processed_df = None
 
437
  formatted_time = format_elapsed_time(elapsed_time)
438
  st.success(f"Обработка и анализ завершены за {formatted_time}.")
439
 
 
 
 
 
440
  st.download_button(
441
  label="Скачать результат анализа",
442
  data=output,