Knight-coderr commited on
Commit
2d945bf
·
verified ·
1 Parent(s): 88e180d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -53
app.py CHANGED
@@ -1,78 +1,101 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  from textblob import TextBlob
4
  import joblib
5
  import matplotlib.pyplot as plt
6
  import datetime
7
 
8
- # Load the data
9
  @st.cache_data
10
- def load_data():
11
- stock_data = pd.read_csv('data/stock_yfinance_data.csv')
12
- tweets_data = pd.read_csv('data/stock_tweets.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Convert the Date columns to datetime
15
- stock_data['Date'] = pd.to_datetime(stock_data['Date'])
16
- tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date
17
 
18
- # Perform sentiment analysis on tweets
19
- def get_sentiment(tweet):
20
- analysis = TextBlob(tweet)
21
- return analysis.sentiment.polarity
22
 
23
- tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
 
24
 
25
- # Aggregate sentiment by date and stock
26
- daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
 
 
27
 
28
- # Convert the Date column in daily_sentiment to datetime64[ns]
29
- daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
30
 
31
- # Merge stock data with sentiment data
32
- merged_data = pd.merge(stock_data, daily_sentiment, how='left', left_on=['Date', 'Stock Name'], right_on=['Date', 'Stock Name'])
33
 
34
- # Fill missing sentiment values with 0 (neutral sentiment)
35
- merged_data['Sentiment'].fillna(0, inplace=True)
36
 
37
- # Sort the data by date
38
- merged_data.sort_values(by='Date', inplace=True)
39
 
40
- # Create lagged features
41
- merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
42
- merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
43
 
44
- # Create moving averages
45
- merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
46
- merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
47
 
48
- # Create daily price changes
49
- merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
 
50
 
51
- # Create volatility
52
- merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
 
53
 
54
- # Drop rows with missing values
55
- merged_data.dropna(inplace=True)
56
 
57
- return merged_data
 
58
 
59
- data = load_data()
60
- stock_names = data['Stock Name'].unique()
61
 
62
  # Load the best model
63
  model_filename = 'model/best_model.pkl'
64
  model = joblib.load(model_filename)
65
 
 
66
  st.title("Stock Price Prediction Using Sentiment Analysis")
67
 
68
  # User input for stock data
69
  st.header("Input Stock Data")
 
70
  selected_stock = st.selectbox("Select Stock Name", stock_names)
71
- days_to_predict = st.number_input("Number of Days to Predict",
72
- min_value=1, max_value=30, value=10)
73
 
74
  # Get the latest data for the selected stock
75
- latest_data = data[data['Stock Name'] == selected_stock].iloc[-1]
76
  prev_close = latest_data['Close']
77
  prev_sentiment = latest_data['Sentiment']
78
  ma7 = latest_data['MA7']
@@ -113,8 +136,6 @@ if st.button("Predict"):
113
  ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
114
  daily_change = next_day_prediction - prev_close
115
 
116
- # st.write(f"Predicted next {days_to_predict} days close prices: {predictions}")
117
- # Prepare prediction data for display
118
  # Prepare prediction data for display
119
  prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
120
  prediction_df = pd.DataFrame({
@@ -123,14 +144,51 @@ if st.button("Predict"):
123
  })
124
 
125
  st.subheader("Predicted Prices")
126
- st.write(prediction_df)
127
-
128
- # Plotting the results
129
- st.subheader("Prediction Chart")
130
- plt.figure(figsize=(10, 6))
131
- plt.plot(prediction_df['Date'], prediction_df['Predicted Close Price'], marker='o', linestyle='--', label="Predicted Close Price")
132
- plt.xlabel("Date")
133
- plt.ylabel("Close Price")
134
- plt.title(f"{selected_stock} Predicted Close Prices")
135
- plt.legend()
136
- st.pyplot(plt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import yfinance as yf
4
  from textblob import TextBlob
5
  import joblib
6
  import matplotlib.pyplot as plt
7
  import datetime
8
 
9
+ # Function to load stock data using yfinance
10
  @st.cache_data
11
+ def load_yfinance_data():
12
+ # List of stock tickers
13
+ tickers = ['TSLA', 'MSFT', 'PG', 'META', 'AMZN', 'GOOG', 'AMD', 'AAPL', 'NFLX', 'TSM',
14
+ 'KO', 'F', 'COST', 'DIS', 'VZ', 'CRM', 'INTC', 'BA', 'BX', 'NOC', 'PYPL', 'ENPH', 'NIO', 'ZS', 'XPEV']
15
+
16
+ # Set the start and end dates for the past 1 year
17
+ start_date = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime('%Y-%m-%d')
18
+ end_date = datetime.datetime.today().strftime('%Y-%m-%d')
19
+
20
+ # Download the stock data using yfinance
21
+ data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker')
22
+
23
+ # Process and format the data for each ticker
24
+ all_data = []
25
+ for ticker in tickers:
26
+ df = data[ticker].copy()
27
+ df.reset_index(inplace=True)
28
+ df['Stock Name'] = ticker
29
+ all_data.append(df)
30
+
31
+ # Concatenate all the data into a single DataFrame
32
+ all_stock_data = pd.concat(all_data, ignore_index=True)
33
+
34
+ return all_stock_data
35
 
36
+ # Load the data
37
+ data = load_yfinance_data()
 
38
 
39
+ # Perform sentiment analysis on tweets (assuming you still have your tweets data)
40
+ tweets_data = pd.read_csv('data/stock_tweets.csv')
 
 
41
 
42
+ # Convert the Date columns to datetime
43
+ tweets_data['Date'] = pd.to_datetime(tweets_data['Date']).dt.date
44
 
45
+ # Perform sentiment analysis on tweets
46
+ def get_sentiment(tweet):
47
+ analysis = TextBlob(tweet)
48
+ return analysis.sentiment.polarity
49
 
50
+ tweets_data['Sentiment'] = tweets_data['Tweet'].apply(get_sentiment)
 
51
 
52
+ # Aggregate sentiment by date and stock
53
+ daily_sentiment = tweets_data.groupby(['Date', 'Stock Name']).mean(numeric_only=True).reset_index()
54
 
55
+ # Convert the Date column in daily_sentiment to datetime64[ns]
56
+ daily_sentiment['Date'] = pd.to_datetime(daily_sentiment['Date'])
57
 
58
+ # Merge stock data with sentiment data
59
+ merged_data = pd.merge(data, daily_sentiment, how='left', on=['Date', 'Stock Name'])
60
 
61
+ # Fill missing sentiment values with 0 (neutral sentiment)
62
+ merged_data['Sentiment'].fillna(0, inplace=True)
 
63
 
64
+ # Sort the data by date
65
+ merged_data.sort_values(by='Date', inplace=True)
 
66
 
67
+ # Create lagged features
68
+ merged_data['Prev_Close'] = merged_data.groupby('Stock Name')['Close'].shift(1)
69
+ merged_data['Prev_Sentiment'] = merged_data.groupby('Stock Name')['Sentiment'].shift(1)
70
 
71
+ # Create moving averages
72
+ merged_data['MA7'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).mean())
73
+ merged_data['MA14'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=14).mean())
74
 
75
+ # Create daily price changes
76
+ merged_data['Daily_Change'] = merged_data['Close'] - merged_data['Prev_Close']
77
 
78
+ # Create volatility
79
+ merged_data['Volatility'] = merged_data.groupby('Stock Name')['Close'].transform(lambda x: x.rolling(window=7).std())
80
 
81
+ # Drop rows with missing values
82
+ merged_data.dropna(inplace=True)
83
 
84
  # Load the best model
85
  model_filename = 'model/best_model.pkl'
86
  model = joblib.load(model_filename)
87
 
88
+ # Streamlit application layout
89
  st.title("Stock Price Prediction Using Sentiment Analysis")
90
 
91
  # User input for stock data
92
  st.header("Input Stock Data")
93
+ stock_names = merged_data['Stock Name'].unique()
94
  selected_stock = st.selectbox("Select Stock Name", stock_names)
95
+ days_to_predict = st.number_input("Number of Days to Predict", min_value=1, max_value=30, value=10)
 
96
 
97
  # Get the latest data for the selected stock
98
+ latest_data = merged_data[merged_data['Stock Name'] == selected_stock].iloc[-1]
99
  prev_close = latest_data['Close']
100
  prev_sentiment = latest_data['Sentiment']
101
  ma7 = latest_data['MA7']
 
136
  ma14 = (ma14 * 13 + next_day_prediction) / 14 # Simplified rolling calculation
137
  daily_change = next_day_prediction - prev_close
138
 
 
 
139
  # Prepare prediction data for display
140
  prediction_dates = pd.date_range(start=latest_date + pd.Timedelta(days=1), periods=days_to_predict)
141
  prediction_df = pd.DataFrame({
 
144
  })
145
 
146
  st.subheader("Predicted Prices")
147
+ # st.write(prediction_df)
148
+ st.dataframe(prediction_df)
149
+ # Plot predictions using Plotly
150
+ import plotly.express as px
151
+ fig = px.line(prediction_df, x='Date', y='Predicted Close Price',
152
+ markers=True, title=f"{selected_stock} Predicted Close Prices")
153
+ st.plotly_chart(fig, use_container_width=True)
154
+
155
+ # ----------------------------------------
156
+ # Enhanced Visualizations
157
+ st.header(" Enhanced Stock Analysis")
158
+ stock_history = data[data['Stock Name'] == selected_stock]
159
+
160
+ # Date filter slider
161
+ min_date = stock_history['Date'].min()
162
+ max_date = stock_history['Date'].max()
163
+ date_range = st.slider("Select Date Range for Visualizations",
164
+ min_value=min_date, max_value=max_date,
165
+ value=(min_date, max_date))
166
+ filtered_data = stock_history[(stock_history['Date'] >= date_range[0]) & (stock_history['Date'] <= date_range[1])]
167
+
168
+ with st.expander(" Price vs Sentiment Trend"):
169
+ fig1 = px.line(filtered_data, x='Date', y=['Close', 'Sentiment'],
170
+ labels={'value': 'Price / Sentiment', 'variable': 'Metric'},
171
+ title=f"{selected_stock} - Close Price & Sentiment")
172
+ st.plotly_chart(fig1, use_container_width=True)
173
+
174
+ with st.expander(" Volatility Over Time"):
175
+ fig2 = px.line(filtered_data, x='Date', y='Volatility',
176
+ title=f"{selected_stock} - 7-Day Rolling Volatility")
177
+ st.plotly_chart(fig2, use_container_width=True)
178
+
179
+ with st.expander(" Moving Averages (MA7 vs MA14)"):
180
+ fig3 = px.line(filtered_data, x='Date',
181
+ y=['MA7', 'MA14'],
182
+ labels={'value': 'Price', 'variable': 'Moving Average'},
183
+ title=f"{selected_stock} - Moving Averages")
184
+ st.plotly_chart(fig3, use_container_width=True)
185
+
186
+ with st.expander(" Daily Price Change"):
187
+ fig4 = px.line(filtered_data, x='Date', y='Daily_Change',
188
+ title=f"{selected_stock} - Daily Price Change")
189
+ st.plotly_chart(fig4, use_container_width=True)
190
+
191
+ with st.expander(" Sentiment Distribution"):
192
+ fig5 = px.histogram(filtered_data, x='Sentiment', nbins=30,
193
+ title=f"{selected_stock} - Sentiment Score Distribution")
194
+ st.plotly_chart(fig5, use_container_width=True)