Spaces:
Running
Running
Commit
·
4ff6c1a
1
Parent(s):
1f0f3cb
progress more (3.1)
Browse files- app.py +83 -32
- requirements.txt +1 -0
app.py
CHANGED
@@ -11,6 +11,14 @@ from langchain_community.chat_models import ChatOpenAI
|
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
from langchain_core.runnables import RunnablePassthrough
|
13 |
from transformers import pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Initialize sentiment analyzers
|
16 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
@@ -261,25 +269,62 @@ def create_analysis_data(df):
|
|
261 |
def create_output_file(df, uploaded_file):
|
262 |
wb = load_workbook("sample_file.xlsx")
|
263 |
|
264 |
-
#
|
265 |
-
|
266 |
'Объект': df['Объект'].unique(),
|
267 |
-
'Всего
|
268 |
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
269 |
-
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int)
|
270 |
-
|
271 |
-
lambda x: x.value_counts().index[0] if len(x) > 0 else 'Неопределенный'
|
272 |
-
)
|
273 |
-
})
|
274 |
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
-
#
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
# Update '
|
281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
if 'Тех.приложение' not in wb.sheetnames:
|
284 |
wb.create_sheet('Тех.приложение')
|
285 |
ws = wb['Тех.приложение']
|
@@ -293,25 +338,27 @@ def create_output_file(df, uploaded_file):
|
|
293 |
return output
|
294 |
|
295 |
def main():
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
315 |
|
316 |
if 'processed_df' not in st.session_state:
|
317 |
st.session_state.processed_df = None
|
@@ -338,6 +385,10 @@ def main():
|
|
338 |
formatted_time = format_elapsed_time(elapsed_time)
|
339 |
st.success(f"Обработка и анализ завершены за {formatted_time}.")
|
340 |
|
|
|
|
|
|
|
|
|
341 |
st.download_button(
|
342 |
label="Скачать результат анализа",
|
343 |
data=output,
|
|
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
from langchain_core.runnables import RunnablePassthrough
|
13 |
from transformers import pipeline
|
14 |
+
from reportlab.lib import colors
|
15 |
+
from reportlab.lib.pagesizes import letter
|
16 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
17 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
18 |
+
from io import StringIO
|
19 |
+
import contextlib
|
20 |
+
|
21 |
+
|
22 |
|
23 |
# Initialize sentiment analyzers
|
24 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
|
|
269 |
def create_output_file(df, uploaded_file):
|
270 |
wb = load_workbook("sample_file.xlsx")
|
271 |
|
272 |
+
# Sort entities by number of negative publications
|
273 |
+
entity_stats = pd.DataFrame({
|
274 |
'Объект': df['Объект'].unique(),
|
275 |
+
'Всего': df.groupby('Объект').size(),
|
276 |
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
277 |
+
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int)
|
278 |
+
}).sort_values('Негативные', ascending=False)
|
|
|
|
|
|
|
279 |
|
280 |
+
# Calculate most negative impact for each entity
|
281 |
+
entity_impacts = {}
|
282 |
+
for entity in df['Объект'].unique():
|
283 |
+
entity_df = df[df['Объект'] == entity]
|
284 |
+
negative_impacts = entity_df[entity_df['Sentiment'] == 'Negative']['Impact']
|
285 |
+
entity_impacts[entity] = negative_impacts.iloc[0] if len(negative_impacts) > 0 else 'Неопределенный эффект'
|
286 |
|
287 |
+
# Update 'Сводка' sheet
|
288 |
+
ws = wb['Сводка']
|
289 |
+
for idx, (entity, row) in enumerate(entity_stats.iterrows(), start=4):
|
290 |
+
ws.cell(row=idx, column=5, value=entity) # Column E
|
291 |
+
ws.cell(row=idx, column=6, value=row['Всего']) # Column F
|
292 |
+
ws.cell(row=idx, column=7, value=row['Негативные']) # Column G
|
293 |
+
ws.cell(row=idx, column=8, value=row['Позитивные']) # Column H
|
294 |
+
ws.cell(row=idx, column=9, value=entity_impacts[entity]) # Column I
|
295 |
|
296 |
+
# Update 'Значимые' sheet
|
297 |
+
ws = wb['Значимые']
|
298 |
+
row_idx = 3
|
299 |
+
for _, row in df.iterrows():
|
300 |
+
if row['Sentiment'] in ['Negative', 'Positive']:
|
301 |
+
ws.cell(row=row_idx, column=3, value=row['Объект']) # Column C
|
302 |
+
ws.cell(row=row_idx, column=4, value='релевантно') # Column D
|
303 |
+
ws.cell(row=row_idx, column=5, value=row['Sentiment']) # Column E
|
304 |
+
ws.cell(row=row_idx, column=6, value=row['Impact']) # Column F
|
305 |
+
ws.cell(row=row_idx, column=7, value=row['Заголовок']) # Column G
|
306 |
+
ws.cell(row=row_idx, column=8, value=row['Выдержки из текста']) # Column H
|
307 |
+
row_idx += 1
|
308 |
+
|
309 |
+
# Copy 'Публикации' sheet
|
310 |
+
original_df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
311 |
+
ws = wb['Публикации']
|
312 |
+
for r_idx, row in enumerate(dataframe_to_rows(original_df, index=False, header=True), start=1):
|
313 |
+
for c_idx, value in enumerate(row, start=1):
|
314 |
+
ws.cell(row=r_idx, column=c_idx, value=value)
|
315 |
|
316 |
+
# Update 'Анализ' sheet
|
317 |
+
ws = wb['Анализ']
|
318 |
+
row_idx = 4
|
319 |
+
for _, row in df[df['Sentiment'] == 'Negative'].iterrows():
|
320 |
+
ws.cell(row=row_idx, column=5, value=row['Объект']) # Column E
|
321 |
+
ws.cell(row=row_idx, column=6, value=row['Заголовок']) # Column F
|
322 |
+
ws.cell(row=row_idx, column=7, value="Риск убытка") # Column G
|
323 |
+
ws.cell(row=row_idx, column=9, value=row['Выдержки из текста']) # Column I
|
324 |
+
row_idx += 1
|
325 |
+
|
326 |
+
# Update 'Тех.приложение' sheet
|
327 |
+
tech_df = df[['Объект', 'Заголовок', 'Выдержки из текста', 'Translated', 'Sentiment', 'Impact', 'Reasoning']]
|
328 |
if 'Тех.приложение' not in wb.sheetnames:
|
329 |
wb.create_sheet('Тех.приложение')
|
330 |
ws = wb['Тех.приложение']
|
|
|
338 |
return output
|
339 |
|
340 |
def main():
|
341 |
+
# Capture all output for PDF
|
342 |
+
with capture_streamlit_output() as output:
|
343 |
+
st.markdown(
|
344 |
+
"""
|
345 |
+
<style>
|
346 |
+
.signature {
|
347 |
+
position: fixed;
|
348 |
+
right: 12px;
|
349 |
+
bottom: 12px;
|
350 |
+
font-size: 14px;
|
351 |
+
color: #FF0000;
|
352 |
+
opacity: 0.9;
|
353 |
+
z-index: 999;
|
354 |
+
}
|
355 |
+
</style>
|
356 |
+
<div class="signature">denis.pokrovsky.npff</div>
|
357 |
+
""",
|
358 |
+
unsafe_allow_html=True
|
359 |
+
)
|
360 |
+
|
361 |
+
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.1):::")
|
362 |
|
363 |
if 'processed_df' not in st.session_state:
|
364 |
st.session_state.processed_df = None
|
|
|
385 |
formatted_time = format_elapsed_time(elapsed_time)
|
386 |
st.success(f"Обработка и анализ завершены за {formatted_time}.")
|
387 |
|
388 |
+
if st.session_state.processed_df is not None:
|
389 |
+
save_to_pdf(output) # Save the captured output to PDF
|
390 |
+
|
391 |
+
|
392 |
st.download_button(
|
393 |
label="Скачать результат анализа",
|
394 |
data=output,
|
requirements.txt
CHANGED
@@ -16,3 +16,4 @@ huggingface_hub
|
|
16 |
accelerate>=0.26.0
|
17 |
openai
|
18 |
wordcloud
|
|
|
|
16 |
accelerate>=0.26.0
|
17 |
openai
|
18 |
wordcloud
|
19 |
+
reportlab==4.1.0
|