Spaces:
Running
Running
Commit
·
4cd2605
1
Parent(s):
5940210
progress more (3.62+)
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ import pdfkit
|
|
18 |
from jinja2 import Template
|
19 |
|
20 |
|
21 |
-
def create_download_section(excel_data,
|
22 |
st.markdown("""
|
23 |
<style>
|
24 |
.download-container {
|
@@ -40,23 +40,34 @@ def create_download_section(excel_data, pdf_data):
|
|
40 |
""", unsafe_allow_html=True)
|
41 |
|
42 |
col1, col2 = st.columns(2)
|
|
|
43 |
with col1:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
with col2:
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def display_sentiment_results(row, sentiment, impact=None, reasoning=None):
|
62 |
if sentiment == "Negative":
|
@@ -85,47 +96,54 @@ def display_sentiment_results(row, sentiment, impact=None, reasoning=None):
|
|
85 |
st.write("---")
|
86 |
|
87 |
|
88 |
-
|
89 |
-
|
90 |
class StreamlitCapture:
|
91 |
def __init__(self):
|
92 |
self.texts = []
|
93 |
|
94 |
def write(self, text):
|
95 |
-
|
96 |
-
|
97 |
-
def save_streamlit_output_to_pdf(texts):
|
98 |
-
# Create HTML content
|
99 |
-
html_content = """
|
100 |
-
<html>
|
101 |
-
<head>
|
102 |
-
<meta charset="UTF-8">
|
103 |
-
<style>
|
104 |
-
body { font-family: Arial, sans-serif; }
|
105 |
-
.content { margin: 20px; }
|
106 |
-
</style>
|
107 |
-
</head>
|
108 |
-
<body>
|
109 |
-
<div class="content">
|
110 |
-
{% for text in texts %}
|
111 |
-
<p>{{ text }}</p>
|
112 |
-
{% endfor %}
|
113 |
-
</div>
|
114 |
-
</body>
|
115 |
-
</html>
|
116 |
-
"""
|
117 |
-
|
118 |
-
template = Template(html_content)
|
119 |
-
rendered_html = template.render(texts=texts)
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
try:
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
except Exception as e:
|
126 |
-
st.
|
127 |
-
|
128 |
-
|
129 |
|
130 |
# Initialize sentiment analyzers
|
131 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
@@ -300,7 +318,7 @@ def process_file(uploaded_file):
|
|
300 |
|
301 |
# Deduplication
|
302 |
original_news_count = len(df)
|
303 |
-
df = df.groupby('Объект').apply(
|
304 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
305 |
).reset_index(drop=True)
|
306 |
|
@@ -340,21 +358,22 @@ def process_file(uploaded_file):
|
|
340 |
impact if sentiment == "Negative" else None,
|
341 |
reasoning if sentiment == "Negative" else None)
|
342 |
|
343 |
-
|
344 |
-
|
345 |
|
346 |
-
# Prepare
|
347 |
excel_output = create_output_file(df, uploaded_file)
|
348 |
-
pdf_data = save_streamlit_output_to_pdf(output_capture.texts)
|
349 |
|
350 |
# Show success message
|
351 |
-
st.success(f"✅ Обработка и анализ завершены за
|
352 |
|
353 |
# Create download section
|
354 |
-
create_download_section(excel_output,
|
|
|
355 |
return df
|
356 |
-
|
357 |
except Exception as e:
|
|
|
358 |
st.error(f"❌ Ошибка при обработке файла: {str(e)}")
|
359 |
raise e
|
360 |
|
@@ -477,7 +496,7 @@ def main():
|
|
477 |
unsafe_allow_html=True
|
478 |
)
|
479 |
|
480 |
-
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.
|
481 |
|
482 |
if 'processed_df' not in st.session_state:
|
483 |
st.session_state.processed_df = None
|
|
|
18 |
from jinja2 import Template
|
19 |
|
20 |
|
21 |
+
def create_download_section(excel_data, output_capture):
|
22 |
st.markdown("""
|
23 |
<style>
|
24 |
.download-container {
|
|
|
40 |
""", unsafe_allow_html=True)
|
41 |
|
42 |
col1, col2 = st.columns(2)
|
43 |
+
|
44 |
with col1:
|
45 |
+
if excel_data is not None:
|
46 |
+
st.download_button(
|
47 |
+
label="📊 Скачать Excel отчет",
|
48 |
+
data=excel_data,
|
49 |
+
file_name="результат_анализа.xlsx",
|
50 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
51 |
+
key="excel_download"
|
52 |
+
)
|
53 |
+
else:
|
54 |
+
st.error("Ошибка при создании Excel файла")
|
55 |
+
|
56 |
with col2:
|
57 |
+
try:
|
58 |
+
pdf_data = generate_pdf_report(output_capture.texts)
|
59 |
+
if pdf_data:
|
60 |
+
st.download_button(
|
61 |
+
label="📄 Скачать протокол",
|
62 |
+
data=pdf_data,
|
63 |
+
file_name="протокол_анализа.pdf" if isinstance(pdf_data, bytes) else "протокол_анализа.txt",
|
64 |
+
mime="application/pdf" if isinstance(pdf_data, bytes) else "text/plain",
|
65 |
+
key="pdf_download"
|
66 |
+
)
|
67 |
+
else:
|
68 |
+
st.error("Ошибка при создании протокола")
|
69 |
+
except Exception as e:
|
70 |
+
st.error(f"Ошибка при создании протокола: {str(e)}")
|
71 |
|
72 |
def display_sentiment_results(row, sentiment, impact=None, reasoning=None):
|
73 |
if sentiment == "Negative":
|
|
|
96 |
st.write("---")
|
97 |
|
98 |
|
|
|
|
|
99 |
class StreamlitCapture:
|
100 |
def __init__(self):
|
101 |
self.texts = []
|
102 |
|
103 |
def write(self, text):
|
104 |
+
if text and str(text).strip(): # Only capture non-empty text
|
105 |
+
self.texts.append(str(text))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
+
def flush(self):
|
108 |
+
pass
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
def generate_pdf_report(texts):
|
113 |
try:
|
114 |
+
import pdfkit
|
115 |
+
from jinja2 import Template
|
116 |
+
|
117 |
+
html_content = """
|
118 |
+
<html>
|
119 |
+
<head>
|
120 |
+
<meta charset="UTF-8">
|
121 |
+
<style>
|
122 |
+
body { font-family: Arial, sans-serif; }
|
123 |
+
.content { margin: 20px; }
|
124 |
+
</style>
|
125 |
+
</head>
|
126 |
+
<body>
|
127 |
+
<div class="content">
|
128 |
+
{% for text in texts %}
|
129 |
+
<p>{{ text }}</p>
|
130 |
+
{% endfor %}
|
131 |
+
</div>
|
132 |
+
</body>
|
133 |
+
</html>
|
134 |
+
"""
|
135 |
+
|
136 |
+
template = Template(html_content)
|
137 |
+
rendered_html = template.render(texts=texts)
|
138 |
+
|
139 |
+
# Create PDF in memory
|
140 |
+
pdf_data = pdfkit.from_string(rendered_html, False)
|
141 |
+
return pdf_data
|
142 |
+
|
143 |
except Exception as e:
|
144 |
+
st.warning(f"Не удалось создать PDF отчет: {str(e)}")
|
145 |
+
# Return the text as bytes if PDF generation fails
|
146 |
+
return '\n'.join(texts).encode('utf-8')
|
147 |
|
148 |
# Initialize sentiment analyzers
|
149 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
|
|
318 |
|
319 |
# Deduplication
|
320 |
original_news_count = len(df)
|
321 |
+
df = df.groupby('Объект', group_keys=False).apply(
|
322 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
323 |
).reset_index(drop=True)
|
324 |
|
|
|
358 |
impact if sentiment == "Negative" else None,
|
359 |
reasoning if sentiment == "Negative" else None)
|
360 |
|
361 |
+
|
362 |
+
sys.stdout = old_stdout
|
363 |
|
364 |
+
# Prepare Excel file
|
365 |
excel_output = create_output_file(df, uploaded_file)
|
|
|
366 |
|
367 |
# Show success message
|
368 |
+
st.success(f"✅ Обработка и анализ завершены за умеренное время.")
|
369 |
|
370 |
# Create download section
|
371 |
+
create_download_section(excel_output, output_capture)
|
372 |
+
|
373 |
return df
|
374 |
+
|
375 |
except Exception as e:
|
376 |
+
sys.stdout = old_stdout
|
377 |
st.error(f"❌ Ошибка при обработке файла: {str(e)}")
|
378 |
raise e
|
379 |
|
|
|
496 |
unsafe_allow_html=True
|
497 |
)
|
498 |
|
499 |
+
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.62):::")
|
500 |
|
501 |
if 'processed_df' not in st.session_state:
|
502 |
st.session_state.processed_df = None
|