pentarosarium commited on
Commit
f7f1da3
·
1 Parent(s): 3ee8d61
Files changed (1) hide show
  1. app.py +115 -2
app.py CHANGED
@@ -12,7 +12,8 @@ import os
12
  groq_key = os.environ['groq_key']
13
  from langchain_openai import ChatOpenAI
14
  from langchain.prompts import PromptTemplate
15
-
 
16
 
17
 
18
  def fuzzy_deduplicate(df, column, threshold=55):
@@ -528,11 +529,123 @@ def process_file(file_obj):
528
  logger.error(f"File processing error: {str(e)}")
529
  raise
530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  def create_interface():
532
  control = ProcessControl()
533
 
534
  with gr.Blocks(theme=gr.themes.Soft()) as app:
535
- gr.Markdown("# AI-анализ мониторинга новостей v.1.28")
536
 
537
  with gr.Row():
538
  file_input = gr.File(
 
12
  groq_key = os.environ['groq_key']
13
  from langchain_openai import ChatOpenAI
14
  from langchain.prompts import PromptTemplate
15
+ from openpyxl import load_workbook
16
+ from openpyxl.utils.dataframe import dataframe_to_rows
17
 
18
 
19
  def fuzzy_deduplicate(df, column, threshold=55):
 
529
  logger.error(f"File processing error: {str(e)}")
530
  raise
531
 
532
+ def create_output_file(df, uploaded_file):
533
+ """Create Excel file with multiple sheets from processed DataFrame"""
534
+ try:
535
+ wb = load_workbook("sample_file.xlsx")
536
+
537
+ # 1. Update 'Публикации' sheet
538
+ ws = wb['Публикации']
539
+ for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), start=1):
540
+ for c_idx, value in enumerate(row, start=1):
541
+ ws.cell(row=r_idx, column=c_idx, value=value)
542
+
543
+ # 2. Update 'Мониторинг' sheet with events
544
+ ws = wb['Мониторинг']
545
+ row_idx = 4
546
+ events_df = df[df['Event_Type'] != 'Нет'].copy()
547
+ for _, row in events_df.iterrows():
548
+ ws.cell(row=row_idx, column=5, value=row['Объект'])
549
+ ws.cell(row=row_idx, column=6, value=row['Заголовок'])
550
+ ws.cell(row=row_idx, column=7, value=row['Event_Type'])
551
+ ws.cell(row=row_idx, column=8, value=row['Event_Summary'])
552
+ ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
553
+ row_idx += 1
554
+
555
+ # 3. Update 'Сводка' sheet
556
+ ws = wb['Сводка']
557
+ unique_entities = df['Объект'].unique()
558
+ entity_stats = []
559
+ for entity in unique_entities:
560
+ entity_df = df[df['Объект'] == entity]
561
+ stats = {
562
+ 'Объект': entity,
563
+ 'Всего': len(entity_df),
564
+ 'Негативные': len(entity_df[entity_df['Sentiment'] == 'Negative']),
565
+ 'Позитивные': len(entity_df[entity_df['Sentiment'] == 'Positive'])
566
+ }
567
+
568
+ # Get most severe impact for entity
569
+ negative_df = entity_df[entity_df['Sentiment'] == 'Negative']
570
+ if len(negative_df) > 0:
571
+ impacts = negative_df['Impact'].dropna()
572
+ if len(impacts) > 0:
573
+ stats['Impact'] = impacts.iloc[0]
574
+ else:
575
+ stats['Impact'] = 'Неопределенный эффект'
576
+ else:
577
+ stats['Impact'] = 'Неопределенный эффект'
578
+
579
+ entity_stats.append(stats)
580
+
581
+ # Sort by number of negative mentions
582
+ entity_stats = sorted(entity_stats, key=lambda x: x['Негативные'], reverse=True)
583
+
584
+ # Write to sheet
585
+ row_idx = 4 # Starting row in Сводка sheet
586
+ for stats in entity_stats:
587
+ ws.cell(row=row_idx, column=5, value=stats['Объект'])
588
+ ws.cell(row=row_idx, column=6, value=stats['Всего'])
589
+ ws.cell(row=row_idx, column=7, value=stats['Негативные'])
590
+ ws.cell(row=row_idx, column=8, value=stats['Позитивные'])
591
+ ws.cell(row=row_idx, column=9, value=stats['Impact'])
592
+ row_idx += 1
593
+
594
+ # 4. Update 'Значимые' sheet
595
+ ws = wb['Значимые']
596
+ row_idx = 3
597
+ sentiment_df = df[df['Sentiment'].isin(['Negative', 'Positive'])].copy()
598
+ for _, row in sentiment_df.iterrows():
599
+ ws.cell(row=row_idx, column=3, value=row['Объект'])
600
+ ws.cell(row=row_idx, column=4, value='релевантно')
601
+ ws.cell(row=row_idx, column=5, value=row['Sentiment'])
602
+ ws.cell(row=row_idx, column=6, value=row.get('Impact', '-'))
603
+ ws.cell(row=row_idx, column=7, value=row['Заголовок'])
604
+ ws.cell(row=row_idx, column=8, value=row['Выдержки из текста'])
605
+ row_idx += 1
606
+
607
+ # 5. Update 'Анализ' sheet
608
+ ws = wb['Анализ']
609
+ row_idx = 4
610
+ negative_df = df[df['Sentiment'] == 'Negative'].copy()
611
+ for _, row in negative_df.iterrows():
612
+ ws.cell(row=row_idx, column=5, value=row['Объект'])
613
+ ws.cell(row=row_idx, column=6, value=row['Заголовок'])
614
+ ws.cell(row=row_idx, column=7, value="Риск убытка")
615
+ ws.cell(row=row_idx, column=8, value=row.get('Reasoning', '-'))
616
+ ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
617
+ row_idx += 1
618
+
619
+ # 6. Update 'Тех.приложение' sheet
620
+ if 'Тех.приложение' not in wb.sheetnames:
621
+ wb.create_sheet('Тех.приложение')
622
+ ws = wb['Тех.приложение']
623
+
624
+ tech_cols = ['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']
625
+ tech_df = df[tech_cols].copy()
626
+
627
+ for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1):
628
+ for c_idx, value in enumerate(row, start=1):
629
+ ws.cell(row=r_idx, column=c_idx, value=value)
630
+
631
+ # Save workbook
632
+ output = io.BytesIO()
633
+ wb.save(output)
634
+ output.seek(0)
635
+ return output
636
+
637
+ except Exception as e:
638
+ logger.error(f"Error creating output file: {str(e)}")
639
+ logger.error(f"DataFrame shape: {df.shape}")
640
+ logger.error(f"Available columns: {df.columns.tolist()}")
641
+ return None
642
+
643
+
644
  def create_interface():
645
  control = ProcessControl()
646
 
647
  with gr.Blocks(theme=gr.themes.Soft()) as app:
648
+ gr.Markdown("# AI-анализ мониторинга новостей v.1.29")
649
 
650
  with gr.Row():
651
  file_input = gr.File(