Spaces:
Sleeping
Sleeping
Commit
·
f7f1da3
1
Parent(s):
3ee8d61
v.1.29
Browse files
app.py
CHANGED
@@ -12,7 +12,8 @@ import os
|
|
12 |
groq_key = os.environ['groq_key']
|
13 |
from langchain_openai import ChatOpenAI
|
14 |
from langchain.prompts import PromptTemplate
|
15 |
-
|
|
|
16 |
|
17 |
|
18 |
def fuzzy_deduplicate(df, column, threshold=55):
|
@@ -528,11 +529,123 @@ def process_file(file_obj):
|
|
528 |
logger.error(f"File processing error: {str(e)}")
|
529 |
raise
|
530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
def create_interface():
|
532 |
control = ProcessControl()
|
533 |
|
534 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
535 |
-
gr.Markdown("# AI-анализ мониторинга новостей v.1.
|
536 |
|
537 |
with gr.Row():
|
538 |
file_input = gr.File(
|
|
|
12 |
groq_key = os.environ['groq_key']
|
13 |
from langchain_openai import ChatOpenAI
|
14 |
from langchain.prompts import PromptTemplate
|
15 |
+
from openpyxl import load_workbook
|
16 |
+
from openpyxl.utils.dataframe import dataframe_to_rows
|
17 |
|
18 |
|
19 |
def fuzzy_deduplicate(df, column, threshold=55):
|
|
|
529 |
logger.error(f"File processing error: {str(e)}")
|
530 |
raise
|
531 |
|
532 |
+
def create_output_file(df, uploaded_file):
|
533 |
+
"""Create Excel file with multiple sheets from processed DataFrame"""
|
534 |
+
try:
|
535 |
+
wb = load_workbook("sample_file.xlsx")
|
536 |
+
|
537 |
+
# 1. Update 'Публикации' sheet
|
538 |
+
ws = wb['Публикации']
|
539 |
+
for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), start=1):
|
540 |
+
for c_idx, value in enumerate(row, start=1):
|
541 |
+
ws.cell(row=r_idx, column=c_idx, value=value)
|
542 |
+
|
543 |
+
# 2. Update 'Мониторинг' sheet with events
|
544 |
+
ws = wb['Мониторинг']
|
545 |
+
row_idx = 4
|
546 |
+
events_df = df[df['Event_Type'] != 'Нет'].copy()
|
547 |
+
for _, row in events_df.iterrows():
|
548 |
+
ws.cell(row=row_idx, column=5, value=row['Объект'])
|
549 |
+
ws.cell(row=row_idx, column=6, value=row['Заголовок'])
|
550 |
+
ws.cell(row=row_idx, column=7, value=row['Event_Type'])
|
551 |
+
ws.cell(row=row_idx, column=8, value=row['Event_Summary'])
|
552 |
+
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
|
553 |
+
row_idx += 1
|
554 |
+
|
555 |
+
# 3. Update 'Сводка' sheet
|
556 |
+
ws = wb['Сводка']
|
557 |
+
unique_entities = df['Объект'].unique()
|
558 |
+
entity_stats = []
|
559 |
+
for entity in unique_entities:
|
560 |
+
entity_df = df[df['Объект'] == entity]
|
561 |
+
stats = {
|
562 |
+
'Объект': entity,
|
563 |
+
'Всего': len(entity_df),
|
564 |
+
'Негативные': len(entity_df[entity_df['Sentiment'] == 'Negative']),
|
565 |
+
'Позитивные': len(entity_df[entity_df['Sentiment'] == 'Positive'])
|
566 |
+
}
|
567 |
+
|
568 |
+
# Get most severe impact for entity
|
569 |
+
negative_df = entity_df[entity_df['Sentiment'] == 'Negative']
|
570 |
+
if len(negative_df) > 0:
|
571 |
+
impacts = negative_df['Impact'].dropna()
|
572 |
+
if len(impacts) > 0:
|
573 |
+
stats['Impact'] = impacts.iloc[0]
|
574 |
+
else:
|
575 |
+
stats['Impact'] = 'Неопределенный эффект'
|
576 |
+
else:
|
577 |
+
stats['Impact'] = 'Неопределенный эффект'
|
578 |
+
|
579 |
+
entity_stats.append(stats)
|
580 |
+
|
581 |
+
# Sort by number of negative mentions
|
582 |
+
entity_stats = sorted(entity_stats, key=lambda x: x['Негативные'], reverse=True)
|
583 |
+
|
584 |
+
# Write to sheet
|
585 |
+
row_idx = 4 # Starting row in Сводка sheet
|
586 |
+
for stats in entity_stats:
|
587 |
+
ws.cell(row=row_idx, column=5, value=stats['Объект'])
|
588 |
+
ws.cell(row=row_idx, column=6, value=stats['Всего'])
|
589 |
+
ws.cell(row=row_idx, column=7, value=stats['Негативные'])
|
590 |
+
ws.cell(row=row_idx, column=8, value=stats['Позитивные'])
|
591 |
+
ws.cell(row=row_idx, column=9, value=stats['Impact'])
|
592 |
+
row_idx += 1
|
593 |
+
|
594 |
+
# 4. Update 'Значимые' sheet
|
595 |
+
ws = wb['Значимые']
|
596 |
+
row_idx = 3
|
597 |
+
sentiment_df = df[df['Sentiment'].isin(['Negative', 'Positive'])].copy()
|
598 |
+
for _, row in sentiment_df.iterrows():
|
599 |
+
ws.cell(row=row_idx, column=3, value=row['Объект'])
|
600 |
+
ws.cell(row=row_idx, column=4, value='релевантно')
|
601 |
+
ws.cell(row=row_idx, column=5, value=row['Sentiment'])
|
602 |
+
ws.cell(row=row_idx, column=6, value=row.get('Impact', '-'))
|
603 |
+
ws.cell(row=row_idx, column=7, value=row['Заголовок'])
|
604 |
+
ws.cell(row=row_idx, column=8, value=row['Выдержки из текста'])
|
605 |
+
row_idx += 1
|
606 |
+
|
607 |
+
# 5. Update 'Анализ' sheet
|
608 |
+
ws = wb['Анализ']
|
609 |
+
row_idx = 4
|
610 |
+
negative_df = df[df['Sentiment'] == 'Negative'].copy()
|
611 |
+
for _, row in negative_df.iterrows():
|
612 |
+
ws.cell(row=row_idx, column=5, value=row['Объект'])
|
613 |
+
ws.cell(row=row_idx, column=6, value=row['Заголовок'])
|
614 |
+
ws.cell(row=row_idx, column=7, value="Риск убытка")
|
615 |
+
ws.cell(row=row_idx, column=8, value=row.get('Reasoning', '-'))
|
616 |
+
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста'])
|
617 |
+
row_idx += 1
|
618 |
+
|
619 |
+
# 6. Update 'Тех.приложение' sheet
|
620 |
+
if 'Тех.приложение' not in wb.sheetnames:
|
621 |
+
wb.create_sheet('Тех.приложение')
|
622 |
+
ws = wb['Тех.приложение']
|
623 |
+
|
624 |
+
tech_cols = ['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']
|
625 |
+
tech_df = df[tech_cols].copy()
|
626 |
+
|
627 |
+
for r_idx, row in enumerate(dataframe_to_rows(tech_df, index=False, header=True), start=1):
|
628 |
+
for c_idx, value in enumerate(row, start=1):
|
629 |
+
ws.cell(row=r_idx, column=c_idx, value=value)
|
630 |
+
|
631 |
+
# Save workbook
|
632 |
+
output = io.BytesIO()
|
633 |
+
wb.save(output)
|
634 |
+
output.seek(0)
|
635 |
+
return output
|
636 |
+
|
637 |
+
except Exception as e:
|
638 |
+
logger.error(f"Error creating output file: {str(e)}")
|
639 |
+
logger.error(f"DataFrame shape: {df.shape}")
|
640 |
+
logger.error(f"Available columns: {df.columns.tolist()}")
|
641 |
+
return None
|
642 |
+
|
643 |
+
|
644 |
def create_interface():
|
645 |
control = ProcessControl()
|
646 |
|
647 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
648 |
+
gr.Markdown("# AI-анализ мониторинга новостей v.1.29")
|
649 |
|
650 |
with gr.Row():
|
651 |
file_input = gr.File(
|