File size: 8,044 Bytes
88e180d
 
2d945bf
88e180d
 
 
debb65c
6ed8b7e
88e180d
652c625
0251baa
23a97dd
6ed8b7e
 
 
 
 
 
 
 
 
 
23a97dd
88e180d
23a97dd
6ed8b7e
23a97dd
 
 
7cded74
 
 
 
 
 
 
 
2725bb3
7cded74
2725bb3
88e180d
2d945bf
 
88e180d
2d945bf
 
88e180d
2d945bf
 
 
 
88e180d
2d945bf
88e180d
2d945bf
 
88e180d
2d945bf
 
88e180d
2d945bf
6ed8b7e
88e180d
2d945bf
0251baa
88e180d
2d945bf
0251baa
88e180d
2d945bf
 
 
88e180d
2d945bf
 
 
88e180d
2d945bf
 
88e180d
2d945bf
 
88e180d
2d945bf
 
88e180d
 
 
 
 
2d945bf
88e180d
 
 
 
2d945bf
88e180d
2d945bf
88e180d
 
2d945bf
88e180d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ed8b7e
88e180d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d945bf
6ed8b7e
2d945bf
6ed8b7e
2d945bf
 
 
 
6ed8b7e
 
2d945bf
 
6ed8b7e
 
23a97dd
 
 
 
 
 
 
 
 
1434106
 
2d945bf
6ed8b7e
2d945bf
 
 
 
 
6ed8b7e
2d945bf
 
 
 
6ed8b7e
 
2d945bf
 
 
 
6ed8b7e
2d945bf
 
 
 
6ed8b7e
2d945bf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import streamlit as st
import pandas as pd
import yfinance as yf
from textblob import TextBlob
import joblib
import matplotlib.pyplot as plt
from datetime import datetime
import plotly.express as px

# Function to load stock data using yfinance/ this is going to refresh after 1 day
@st.cache_data(ttl=86400)
def load_stock_data(tickers, start_date, end_date):
    with st.spinner('Downloading stock data...'):
        data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=True)
        
        all_data = []
        for ticker in tickers:
            df = data[ticker].copy().reset_index()
            df['Stock Name'] = ticker
            all_data.append(df)

        merged_data = pd.concat(all_data, ignore_index=True)
    return merged_data

tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL', 'NFLX', 'TSM',
           'KO', 'F', 'COST', 'DIS', 'VZ', 'CRM', 'INTC', 'BA', 'BX', 'NOC', 'PYPL', 'ENPH', 'NIO', 'ZS', 'XPEV']
start_date = (datetime.today() - pd.DateOffset(years=1)).strftime('%Y-%m-%d')
end_date = datetime.today().strftime('%Y-%m-%d')

# Cache stock data for 1 day using st.cache_data
@st.cache_data(ttl=86400)
def load_and_cache_stock_data():
    return load_stock_data(tickers, start_date, end_date)

# Initialize stock_data once at app startup
if "stock_data" not in st.session_state:
    st.session_state["stock_data"] = load_and_cache_stock_data()

stock_data = st.session_state["stock_data"]


# Perform sentiment analysis on tweets (assuming you still have your tweets data)
tweets_data = pd.read_csv('data/stock_tweets.csv')

# Convert the Date columns to datetime
tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date

# Perform sentiment analysis on tweets
def get_sentiment(tweet):
    analysis = TextBlob(tweet)
    return analysis.sentiment.polarity

tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)

# Aggregate sentiment by date and stock
daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()

# Convert the Date column in daily_sentiment to datetime64[ns]
daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])

# Merge stock data with sentiment data
merged_data = pd.merge(stock_data, daily_sentiment, how='left', on=['Date', 'Stock Name'])

# Fill missing sentiment values with 0 (neutral sentiment)
merged_data['Sentiment'] = merged_data['Sentiment'].fillna(0)

# Sort the data by date
merged_data = merged_data.sort_values(by='Date')

# Create lagged features
merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)

# Create moving averages
merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())

# Create daily price changes
merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']

# Create volatility
merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())

# Drop rows with missing values
merged_data.dropna(inplace=True)

# Load the best model
model_filename = 'model/best_model.pkl'
model = joblib.load(model_filename)

# Streamlit application layout
st.title("Stock Price Prediction Using Sentiment Analysis")

# User input for stock data
st.header("Input Stock Data")
stock_names = merged_data['Stock Name'].unique()
selected_stock = st.selectbox("Select Stock Name", stock_names)
days_to_predict = st.number_input("Number of Days to Predict", min_value=1, max_value=30, value=10)

# Get the latest data for the selected stock
latest_data = merged_data[merged_data['Stock Name'] == selected_stock].iloc[-1]
prev_close = latest_data['Close']
prev_sentiment = latest_data['Sentiment']
ma7 = latest_data['MA7']
ma14 = latest_data['MA14']
daily_change = latest_data['Daily_Change']
volatility = latest_data['Volatility']

# Display the latest stock data in a table
latest_data_df = pd.DataFrame({
    'Metric': ['Previous Close Price', 'Previous Sentiment', '7-day Moving Average', '14-day Moving Average', 'Daily Change', 'Volatility'],
    'Value': [prev_close, prev_sentiment, ma7, ma14, daily_change, volatility]
})

st.write("Latest Stock Data:")
st.write(latest_data_df)

st.write("Use the inputs above to predict the next days close prices of the stock.")
if st.button("Predict"):
    predictions = []
    latest_date = datetime.now()

    for i in range(days_to_predict):
        X_future = pd.DataFrame({
            'Prev_Close': [prev_close],
            'Prev_Sentiment': [prev_sentiment],
            'MA7': [ma7],
            'MA14': [ma14],
            'Daily_Change': [daily_change],
            'Volatility': [volatility]
        })

        next_day_prediction = model.predict(X_future)[0]
        predictions.append(next_day_prediction)

        # Update features for next prediction
        prev_close = next_day_prediction
        ma7 = (ma7 * 6 + next_day_prediction) / 7  # Simplified rolling calculation
        ma14 = (ma14 * 13 + next_day_prediction) / 14  # Simplified rolling calculation
        daily_change = next_day_prediction - prev_close

    # Prepare prediction data for display
    prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
    prediction_df = pd.DataFrame({
        'Date': prediction_dates,
        'Predicted Close Price': predictions
    })

    st.subheader("Predicted Prices")
    st.dataframe(prediction_df)
    
    # Plot predictions using Plotly
    fig = px.line(prediction_df, x='Date', y='Predicted Close Price', markers=True, title=f"{selected_stock} Predicted Close Prices")
    st.plotly_chart(fig, use_container_width=True)

    # ----------------------------------------
    # Enhanced Visualizations
    st.header("Enhanced Stock Analysis")
    stock_history = merged_data[merged_data['Stock Name'] == selected_stock]

    # Date filter slider
    min_date = pd.to_datetime(merged_data['Date'].min()).date()
    max_date = pd.to_datetime(merged_data['Date'].max()).date()
    
    date_range = st.slider(
        "Select Date Range for Visualizations",
        min_value=min_date,
        max_value=max_date,
        value=(min_date, max_date),
        format="YYYY-MM-DD"
    )

    filtered_data = stock_history[(stock_history['Date'] >= pd.to_datetime(date_range[0])) & 
                              (stock_history['Date'] <= pd.to_datetime(date_range[1]))]

    with st.expander("Price vs Sentiment Trend"):
        fig1 = px.line(filtered_data, x='Date', y=['Close', 'Sentiment'],
                       labels={'value': 'Price / Sentiment', 'variable': 'Metric'},
                       title=f"{selected_stock} - Close Price & Sentiment")
        st.plotly_chart(fig1, use_container_width=True)

    with st.expander("Volatility Over Time"):
        fig2 = px.line(filtered_data, x='Date', y='Volatility',
                       title=f"{selected_stock} - 7-Day Rolling Volatility")
        st.plotly_chart(fig2, use_container_width=True)

    with st.expander("Moving Averages (MA7 vs MA14)"):
        fig3 = px.line(filtered_data, x='Date', y=['MA7', 'MA14'],
                       labels={'value': 'Price', 'variable': 'Moving Average'},
                       title=f"{selected_stock} - Moving Averages")
        st.plotly_chart(fig3, use_container_width=True)

    with st.expander("Daily Price Change"):
        fig4 = px.line(filtered_data, x='Date', y='Daily_Change',
                       title=f"{selected_stock} - Daily Price Change")
        st.plotly_chart(fig4, use_container_width=True)

    with st.expander("Sentiment Distribution"):
        fig5 = px.histogram(filtered_data, x='Sentiment', nbins=30,
                            title=f"{selected_stock} - Sentiment Score Distribution")
        st.plotly_chart(fig5, use_container_width=True)