pentarosarium commited on
Commit
c39eb14
·
2 Parent(s): 3422965 053a9af

Merge branch 'master' of https://github.com/denispokrovsky/synthreader

Browse files
Files changed (2) hide show
  1. hf-streamlit-app.py +89 -0
  2. requirements.txt +9 -0
hf-streamlit-app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
5
+ from transformers import pipeline, MarianMTModel, MarianTokenizer
6
+ import matplotlib.pyplot as plt
7
+ from pymystem3 import Mystem
8
+ import io
9
+ from rapidfuzz import fuzz
10
+
11
+ # Initialize components (VADER, FinBERT, RoBERTa, FinBERT-Tone, Mystem, translation model)
12
+ # (Copy the initialization code from your original script)
13
+
14
+ # Define helper functions (lemmatize_text, translate, get_vader_sentiment, etc.)
15
+ # (Copy these functions from your original script)
16
+
17
+ def process_file(uploaded_file):
18
+ df = pd.read_excel(uploaded_file, sheet_name='Публикации')
19
+
20
+ # Apply fuzzy deduplication
21
+ df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)).reset_index(drop=True)
22
+
23
+ # Translate texts
24
+ translated_texts = []
25
+ progress_bar = st.progress(0)
26
+ for i, text in enumerate(df['Выдержки из текста']):
27
+ translated_text = translate(str(text))
28
+ translated_texts.append(translated_text)
29
+ progress_bar.progress((i + 1) / len(df))
30
+
31
+ # Perform sentiment analysis
32
+ vader_results = [get_vader_sentiment(text) for text in translated_texts]
33
+ finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
34
+ roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
35
+ finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
36
+
37
+ # Add results to DataFrame
38
+ df['VADER'] = vader_results
39
+ df['FinBERT'] = finbert_results
40
+ df['RoBERTa'] = roberta_results
41
+ df['FinBERT-Tone'] = finbert_tone_results
42
+
43
+ # Reorder columns
44
+ columns_order = ['Объект', 'VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста']
45
+ df = df[columns_order]
46
+
47
+ return df
48
+
49
+ def main():
50
+ st.title("Sentiment Analysis App")
51
+
52
+ uploaded_file = st.file_uploader("Choose an Excel file", type="xlsx")
53
+
54
+ if uploaded_file is not None:
55
+ df = process_file(uploaded_file)
56
+
57
+ st.subheader("Data Preview")
58
+ st.write(df.head())
59
+
60
+ st.subheader("Sentiment Distribution")
61
+ fig, axs = plt.subplots(2, 2, figsize=(12, 8))
62
+ fig.suptitle("Sentiment Distribution for Each Model")
63
+
64
+ models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
65
+ for i, model in enumerate(models):
66
+ ax = axs[i // 2, i % 2]
67
+ sentiment_counts = df[model].value_counts()
68
+ sentiment_counts.plot(kind='bar', ax=ax)
69
+ ax.set_title(f"{model} Sentiment")
70
+ ax.set_xlabel("Sentiment")
71
+ ax.set_ylabel("Count")
72
+
73
+ plt.tight_layout()
74
+ st.pyplot(fig)
75
+
76
+ # Offer download of results
77
+ output = io.BytesIO()
78
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
79
+ df.to_excel(writer, index=False)
80
+ output.seek(0)
81
+ st.download_button(
82
+ label="Download results as Excel",
83
+ data=output,
84
+ file_name="sentiment_analysis_results.xlsx",
85
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
86
+ )
87
+
88
+ if __name__ == "__main__":
89
+ main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ vaderSentiment
4
+ transformers
5
+ torch
6
+ pymystem3
7
+ openpyxl
8
+ rapidfuzz
9
+ matplotlib