pentarosarium commited on
Commit
6da2a21
·
1 Parent(s): 08fb3e7

progress more (3.3)

Browse files
Files changed (2) hide show
  1. app.py +36 -70
  2. requirements.txt +1 -1
app.py CHANGED
@@ -20,45 +20,29 @@ import sys
20
  import contextlib
21
 
22
 
23
- class StreamlitOutputCapture:
24
- def __init__(self):
25
- self.output = []
26
 
 
 
 
 
27
  def write(self, text):
28
- self.output.append(text)
29
-
30
- def getvalue(self):
31
- return ''.join(self.output)
32
 
33
- def flush(self):
34
- pass
35
-
36
- def save_to_pdf(captured_output):
37
- try:
38
- # Create PDF document
39
- doc = SimpleDocTemplate("result.pdf", pagesize=letter)
40
- styles = getSampleStyleSheet()
41
- story = []
42
-
43
- # Convert captured output to string and split into lines
44
- output_text = captured_output.getvalue()
45
- lines = output_text.split('\n')
46
-
47
- # Add each line to the PDF
48
- for line in lines:
49
- if line.strip(): # Skip empty lines
50
- # Clean the line and handle any encoding issues
51
- cleaned_line = line.encode('utf-8', errors='ignore').decode('utf-8')
52
- p = Paragraph(cleaned_line, styles['Normal'])
53
- story.append(p)
54
- story.append(Spacer(1, 12))
55
-
56
- # Build the PDF
57
- doc.build(story)
58
- st.success("PDF файл 'result.pdf' успешно создан")
59
- except Exception as e:
60
- st.error(f"Ошибка при создании PDF: {str(e)}")
61
 
 
62
  # Initialize sentiment analyzers
63
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
64
  roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
@@ -211,12 +195,10 @@ def generate_sentiment_visualization(df):
211
  return fig
212
 
213
  def process_file(uploaded_file):
214
-
215
-
216
- output_capture = StreamlitOutputCapture()
217
  old_stdout = sys.stdout
218
  sys.stdout = output_capture
219
-
220
  try:
221
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
222
 
@@ -253,50 +235,33 @@ def process_file(uploaded_file):
253
  df['Reasoning'] = ''
254
 
255
  for index, row in df.iterrows():
256
- # First: Translate
257
  translated_text = translate_text(llm, row['Выдержки из текста'])
258
  df.at[index, 'Translated'] = translated_text
259
-
260
- # Second: Analyze sentiment
261
  sentiment = analyze_sentiment(translated_text)
262
  df.at[index, 'Sentiment'] = sentiment
263
-
264
- # Third: If negative, estimate impact
265
  if sentiment == "Negative":
266
  impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
267
  df.at[index, 'Impact'] = impact
268
  df.at[index, 'Reasoning'] = reasoning
269
-
270
  # Update progress
271
  progress = (index + 1) / len(df)
272
  progress_bar.progress(progress)
273
  status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
 
 
 
 
 
 
 
 
274
 
275
- # Display results
276
- st.write(f"Объект: {row['Объект']}")
277
- st.write(f"Новость: {row['Заголовок']}")
278
- st.write(f"Тональность: {sentiment}")
279
- if sentiment == "Negative":
280
- st.write(f"Эффект: {impact}")
281
- st.write(f"Обоснование: {reasoning}")
282
- st.write("---")
283
-
284
- progress_bar.empty()
285
- status_text.empty()
286
-
287
- # Generate visualization
288
- visualization = generate_sentiment_visualization(df)
289
- if visualization:
290
- st.pyplot(visualization)
291
-
292
- save_to_pdf(output_capture)
293
-
294
-
295
  return df
296
 
297
-
298
- finally:
299
-
300
  sys.stdout = old_stdout
301
 
302
  def create_analysis_data(df):
@@ -410,7 +375,7 @@ def main():
410
  unsafe_allow_html=True
411
  )
412
 
413
- st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.2):::")
414
 
415
  if 'processed_df' not in st.session_state:
416
  st.session_state.processed_df = None
@@ -421,7 +386,8 @@ def main():
421
  start_time = time.time()
422
 
423
  st.session_state.processed_df = process_file(uploaded_file)
424
-
 
425
  st.subheader("Предпросмотр данных")
426
  preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head()
427
  st.dataframe(preview_df)
 
20
  import contextlib
21
 
22
 
23
+ from fpdf import FPDF
 
 
24
 
25
+ class StreamlitCapture:
26
+ def __init__(self):
27
+ self.texts = []
28
+
29
  def write(self, text):
30
+ self.texts.append(str(text))
 
 
 
31
 
32
+ def save_streamlit_output_to_pdf(texts):
33
+ pdf = FPDF()
34
+ pdf.add_page()
35
+ pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
36
+ pdf.set_font('DejaVu', '', 12)
37
+
38
+ for text in texts:
39
+ # Clean and encode the text
40
+ clean_text = text.encode('latin-1', errors='replace').decode('latin-1')
41
+ pdf.multi_cell(0, 10, clean_text)
42
+
43
+ pdf.output("result.pdf", 'F')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+
46
  # Initialize sentiment analyzers
47
  finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
48
  roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
 
195
  return fig
196
 
197
  def process_file(uploaded_file):
198
+ output_capture = StreamlitCapture()
 
 
199
  old_stdout = sys.stdout
200
  sys.stdout = output_capture
201
+
202
  try:
203
  df = pd.read_excel(uploaded_file, sheet_name='Публикации')
204
 
 
235
  df['Reasoning'] = ''
236
 
237
  for index, row in df.iterrows():
 
238
  translated_text = translate_text(llm, row['Выдержки из текста'])
239
  df.at[index, 'Translated'] = translated_text
240
+
 
241
  sentiment = analyze_sentiment(translated_text)
242
  df.at[index, 'Sentiment'] = sentiment
243
+
 
244
  if sentiment == "Negative":
245
  impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
246
  df.at[index, 'Impact'] = impact
247
  df.at[index, 'Reasoning'] = reasoning
248
+
249
  # Update progress
250
  progress = (index + 1) / len(df)
251
  progress_bar.progress(progress)
252
  status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
253
+
254
+ # Display results with color coding
255
+ display_sentiment_results(row, sentiment,
256
+ impact if sentiment == "Negative" else None,
257
+ reasoning if sentiment == "Negative" else None)
258
+
259
+ # Generate PDF at the end of processing
260
+ save_streamlit_output_to_pdf(output_capture.texts)
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  return df
263
 
264
+ finally:
 
 
265
  sys.stdout = old_stdout
266
 
267
  def create_analysis_data(df):
 
375
  unsafe_allow_html=True
376
  )
377
 
378
+ st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.3):::")
379
 
380
  if 'processed_df' not in st.session_state:
381
  st.session_state.processed_df = None
 
386
  start_time = time.time()
387
 
388
  st.session_state.processed_df = process_file(uploaded_file)
389
+
390
+
391
  st.subheader("Предпросмотр данных")
392
  preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head()
393
  st.dataframe(preview_df)
requirements.txt CHANGED
@@ -16,4 +16,4 @@ huggingface_hub
16
  accelerate>=0.26.0
17
  openai
18
  wordcloud
19
- reportlab==4.1.0
 
16
  accelerate>=0.26.0
17
  openai
18
  wordcloud
19
+ fpdf2 = 2.7.8