pentarosarium commited on
Commit
7e14e6f
·
1 Parent(s): c39eb14

another try at sync with hf

Browse files
Files changed (2) hide show
  1. .github/workflows/sync-to-huggingface.yml +18 -0
  2. app.py +90 -0
.github/workflows/sync-to-huggingface.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ sync-to-hub:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v2
12
+ with:
13
+ fetch-depth: 0
14
+ - name: Push to hub
15
+ env:
16
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
17
+ run: |
18
+ git push https://pentarosarium:[email protected]/spaces/pentarosarium/processor main
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
5
+ from transformers import pipeline, MarianMTModel, MarianTokenizer
6
+ import matplotlib.pyplot as plt
7
+ from pymystem3 import Mystem
8
+ import io
9
+ from rapidfuzz import fuzz
10
+
11
+ # Initialize components (VADER, FinBERT, RoBERTa, FinBERT-Tone, Mystem, translation model)
12
+
13
+ # (Copy the initialization code from your original script)
14
+
15
+ # Define helper functions (lemmatize_text, translate, get_vader_sentiment...)
16
+ # (Copy these functions from your original script)
17
+
18
+ def process_file(uploaded_file):
19
+ df = pd.read_excel(uploaded_file, sheet_name='Публикации')
20
+
21
+ # Apply fuzzy deduplication
22
+ df = df.groupby('Объект').apply(lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)).reset_index(drop=True)
23
+
24
+ # Translate texts
25
+ translated_texts = []
26
+ progress_bar = st.progress(0)
27
+ for i, text in enumerate(df['Выдержки из текста']):
28
+ translated_text = translate(str(text))
29
+ translated_texts.append(translated_text)
30
+ progress_bar.progress((i + 1) / len(df))
31
+
32
+ # Perform sentiment analysis
33
+ vader_results = [get_vader_sentiment(text) for text in translated_texts]
34
+ finbert_results = [get_finbert_sentiment(text) for text in translated_texts]
35
+ roberta_results = [get_roberta_sentiment(text) for text in translated_texts]
36
+ finbert_tone_results = [get_finbert_tone_sentiment(text) for text in translated_texts]
37
+
38
+ # Add results to DataFrame
39
+ df['VADER'] = vader_results
40
+ df['FinBERT'] = finbert_results
41
+ df['RoBERTa'] = roberta_results
42
+ df['FinBERT-Tone'] = finbert_tone_results
43
+
44
+ # Reorder columns
45
+ columns_order = ['Объект', 'VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone', 'Выдержки из текста']
46
+ df = df[columns_order]
47
+
48
+ return df
49
+
50
+ def main():
51
+ st.title("Sentiment Analysis App")
52
+
53
+ uploaded_file = st.file_uploader("Choose an Excel file", type="xlsx")
54
+
55
+ if uploaded_file is not None:
56
+ df = process_file(uploaded_file)
57
+
58
+ st.subheader("Data Preview")
59
+ st.write(df.head())
60
+
61
+ st.subheader("Sentiment Distribution")
62
+ fig, axs = plt.subplots(2, 2, figsize=(12, 8))
63
+ fig.suptitle("Sentiment Distribution for Each Model")
64
+
65
+ models = ['VADER', 'FinBERT', 'RoBERTa', 'FinBERT-Tone']
66
+ for i, model in enumerate(models):
67
+ ax = axs[i // 2, i % 2]
68
+ sentiment_counts = df[model].value_counts()
69
+ sentiment_counts.plot(kind='bar', ax=ax)
70
+ ax.set_title(f"{model} Sentiment")
71
+ ax.set_xlabel("Sentiment")
72
+ ax.set_ylabel("Count")
73
+
74
+ plt.tight_layout()
75
+ st.pyplot(fig)
76
+
77
+ # Offer download of results
78
+ output = io.BytesIO()
79
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
80
+ df.to_excel(writer, index=False)
81
+ output.seek(0)
82
+ st.download_button(
83
+ label="Download results as Excel",
84
+ data=output,
85
+ file_name="sentiment_analysis_results.xlsx",
86
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ main()