Spaces:
Running
Running
Commit
·
6da2a21
1
Parent(s):
08fb3e7
progress more (3.3)
Browse files- app.py +36 -70
- requirements.txt +1 -1
app.py
CHANGED
@@ -20,45 +20,29 @@ import sys
|
|
20 |
import contextlib
|
21 |
|
22 |
|
23 |
-
|
24 |
-
def __init__(self):
|
25 |
-
self.output = []
|
26 |
|
|
|
|
|
|
|
|
|
27 |
def write(self, text):
|
28 |
-
self.
|
29 |
-
|
30 |
-
def getvalue(self):
|
31 |
-
return ''.join(self.output)
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
lines = output_text.split('\n')
|
46 |
-
|
47 |
-
# Add each line to the PDF
|
48 |
-
for line in lines:
|
49 |
-
if line.strip(): # Skip empty lines
|
50 |
-
# Clean the line and handle any encoding issues
|
51 |
-
cleaned_line = line.encode('utf-8', errors='ignore').decode('utf-8')
|
52 |
-
p = Paragraph(cleaned_line, styles['Normal'])
|
53 |
-
story.append(p)
|
54 |
-
story.append(Spacer(1, 12))
|
55 |
-
|
56 |
-
# Build the PDF
|
57 |
-
doc.build(story)
|
58 |
-
st.success("PDF файл 'result.pdf' успешно создан")
|
59 |
-
except Exception as e:
|
60 |
-
st.error(f"Ошибка при создании PDF: {str(e)}")
|
61 |
|
|
|
62 |
# Initialize sentiment analyzers
|
63 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
64 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
@@ -211,12 +195,10 @@ def generate_sentiment_visualization(df):
|
|
211 |
return fig
|
212 |
|
213 |
def process_file(uploaded_file):
|
214 |
-
|
215 |
-
|
216 |
-
output_capture = StreamlitOutputCapture()
|
217 |
old_stdout = sys.stdout
|
218 |
sys.stdout = output_capture
|
219 |
-
|
220 |
try:
|
221 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
222 |
|
@@ -253,50 +235,33 @@ def process_file(uploaded_file):
|
|
253 |
df['Reasoning'] = ''
|
254 |
|
255 |
for index, row in df.iterrows():
|
256 |
-
# First: Translate
|
257 |
translated_text = translate_text(llm, row['Выдержки из текста'])
|
258 |
df.at[index, 'Translated'] = translated_text
|
259 |
-
|
260 |
-
# Second: Analyze sentiment
|
261 |
sentiment = analyze_sentiment(translated_text)
|
262 |
df.at[index, 'Sentiment'] = sentiment
|
263 |
-
|
264 |
-
# Third: If negative, estimate impact
|
265 |
if sentiment == "Negative":
|
266 |
impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
|
267 |
df.at[index, 'Impact'] = impact
|
268 |
df.at[index, 'Reasoning'] = reasoning
|
269 |
-
|
270 |
# Update progress
|
271 |
progress = (index + 1) / len(df)
|
272 |
progress_bar.progress(progress)
|
273 |
status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
-
# Display results
|
276 |
-
st.write(f"Объект: {row['Объект']}")
|
277 |
-
st.write(f"Новость: {row['Заголовок']}")
|
278 |
-
st.write(f"Тональность: {sentiment}")
|
279 |
-
if sentiment == "Negative":
|
280 |
-
st.write(f"Эффект: {impact}")
|
281 |
-
st.write(f"Обоснование: {reasoning}")
|
282 |
-
st.write("---")
|
283 |
-
|
284 |
-
progress_bar.empty()
|
285 |
-
status_text.empty()
|
286 |
-
|
287 |
-
# Generate visualization
|
288 |
-
visualization = generate_sentiment_visualization(df)
|
289 |
-
if visualization:
|
290 |
-
st.pyplot(visualization)
|
291 |
-
|
292 |
-
save_to_pdf(output_capture)
|
293 |
-
|
294 |
-
|
295 |
return df
|
296 |
|
297 |
-
|
298 |
-
finally:
|
299 |
-
|
300 |
sys.stdout = old_stdout
|
301 |
|
302 |
def create_analysis_data(df):
|
@@ -410,7 +375,7 @@ def main():
|
|
410 |
unsafe_allow_html=True
|
411 |
)
|
412 |
|
413 |
-
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.
|
414 |
|
415 |
if 'processed_df' not in st.session_state:
|
416 |
st.session_state.processed_df = None
|
@@ -421,7 +386,8 @@ def main():
|
|
421 |
start_time = time.time()
|
422 |
|
423 |
st.session_state.processed_df = process_file(uploaded_file)
|
424 |
-
|
|
|
425 |
st.subheader("Предпросмотр данных")
|
426 |
preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head()
|
427 |
st.dataframe(preview_df)
|
|
|
20 |
import contextlib
|
21 |
|
22 |
|
23 |
+
from fpdf import FPDF
|
|
|
|
|
24 |
|
25 |
+
class StreamlitCapture:
|
26 |
+
def __init__(self):
|
27 |
+
self.texts = []
|
28 |
+
|
29 |
def write(self, text):
|
30 |
+
self.texts.append(str(text))
|
|
|
|
|
|
|
31 |
|
32 |
+
def save_streamlit_output_to_pdf(texts):
|
33 |
+
pdf = FPDF()
|
34 |
+
pdf.add_page()
|
35 |
+
pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
|
36 |
+
pdf.set_font('DejaVu', '', 12)
|
37 |
+
|
38 |
+
for text in texts:
|
39 |
+
# Clean and encode the text
|
40 |
+
clean_text = text.encode('latin-1', errors='replace').decode('latin-1')
|
41 |
+
pdf.multi_cell(0, 10, clean_text)
|
42 |
+
|
43 |
+
pdf.output("result.pdf", 'F')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
|
46 |
# Initialize sentiment analyzers
|
47 |
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
48 |
roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
|
|
|
195 |
return fig
|
196 |
|
197 |
def process_file(uploaded_file):
|
198 |
+
output_capture = StreamlitCapture()
|
|
|
|
|
199 |
old_stdout = sys.stdout
|
200 |
sys.stdout = output_capture
|
201 |
+
|
202 |
try:
|
203 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
204 |
|
|
|
235 |
df['Reasoning'] = ''
|
236 |
|
237 |
for index, row in df.iterrows():
|
|
|
238 |
translated_text = translate_text(llm, row['Выдержки из текста'])
|
239 |
df.at[index, 'Translated'] = translated_text
|
240 |
+
|
|
|
241 |
sentiment = analyze_sentiment(translated_text)
|
242 |
df.at[index, 'Sentiment'] = sentiment
|
243 |
+
|
|
|
244 |
if sentiment == "Negative":
|
245 |
impact, reasoning = estimate_impact(llm, translated_text, row['Объект'])
|
246 |
df.at[index, 'Impact'] = impact
|
247 |
df.at[index, 'Reasoning'] = reasoning
|
248 |
+
|
249 |
# Update progress
|
250 |
progress = (index + 1) / len(df)
|
251 |
progress_bar.progress(progress)
|
252 |
status_text.text(f"Проанализировано {index + 1} из {len(df)} новостей")
|
253 |
+
|
254 |
+
# Display results with color coding
|
255 |
+
display_sentiment_results(row, sentiment,
|
256 |
+
impact if sentiment == "Negative" else None,
|
257 |
+
reasoning if sentiment == "Negative" else None)
|
258 |
+
|
259 |
+
# Generate PDF at the end of processing
|
260 |
+
save_streamlit_output_to_pdf(output_capture.texts)
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
return df
|
263 |
|
264 |
+
finally:
|
|
|
|
|
265 |
sys.stdout = old_stdout
|
266 |
|
267 |
def create_analysis_data(df):
|
|
|
375 |
unsafe_allow_html=True
|
376 |
)
|
377 |
|
378 |
+
st.title("::: анализ мониторинга новостей СКАН-ИНТЕРФАКС (v.3.3):::")
|
379 |
|
380 |
if 'processed_df' not in st.session_state:
|
381 |
st.session_state.processed_df = None
|
|
|
386 |
start_time = time.time()
|
387 |
|
388 |
st.session_state.processed_df = process_file(uploaded_file)
|
389 |
+
|
390 |
+
|
391 |
st.subheader("Предпросмотр данных")
|
392 |
preview_df = st.session_state.processed_df[['Объект', 'Заголовок', 'Sentiment', 'Impact']].head()
|
393 |
st.dataframe(preview_df)
|
requirements.txt
CHANGED
@@ -16,4 +16,4 @@ huggingface_hub
|
|
16 |
accelerate>=0.26.0
|
17 |
openai
|
18 |
wordcloud
|
19 |
-
|
|
|
16 |
accelerate>=0.26.0
|
17 |
openai
|
18 |
wordcloud
|
19 |
+
fpdf2 = 2.7.8
|