Shreneek's picture
Update app.py
9a9f2ab verified
# streamlit_app.py - Bolt Driver Recommendation System
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import folium
from folium.plugins import HeatMap, MarkerCluster
from streamlit_folium import folium_static
import pickle
import os
# Set page configuration
st.set_page_config(
page_title="Bolt Driver Recommendation System",
page_icon="πŸš–",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS styling
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #272D37;
text-align: center;
margin-bottom: 1rem;
font-weight: bold;
}
.sub-header {
font-size: 1.8rem;
color: #272D37;
margin-top: 1.5rem;
margin-bottom: 1rem;
}
.section-header {
font-size: 1.3rem;
color: #272D37;
margin-top: 1rem;
margin-bottom: 0.5rem;
font-weight: bold;
}
.highlight {
background-color: #F0F2F6;
padding: 1rem;
border-radius: 0.5rem;
margin-bottom: 1rem;
}
.card {
background-color: white;
border-radius: 0.5rem;
padding: 1.5rem;
box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
margin-bottom: 1rem;
}
.info-box {
background-color: #e8f4f8;
border-left: 5px solid #4e8cff;
padding: 0.8rem;
border-radius: 0.3rem;
margin-bottom: 1rem;
}
.metric-container {
display: flex;
justify-content: space-between;
gap: 1rem;
}
.metric-card {
background-color: white;
border-radius: 0.5rem;
padding: 1rem;
text-align: center;
box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
flex: 1;
}
.metric-value {
font-size: 1.8rem;
font-weight: bold;
color: #272D37;
}
.metric-label {
font-size: 0.9rem;
color: #6e707e;
}
</style>
""", unsafe_allow_html=True)
# Header and app description
st.markdown('<div class="main-header">Bolt Driver Recommendation System</div>', unsafe_allow_html=True)
with st.container():
st.markdown('<div class="info-box">This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.</div>', unsafe_allow_html=True)
class DemandPredictionModel:
def __init__(self):
"""Initialize the demand prediction model"""
# In a real app, we would load the model from a file
# Here we'll create a dummy version for demonstration
self.setup_demo_data()
def setup_demo_data(self):
"""Set up demonstration data based on our analysis"""
# Define geographic boundaries (Tallinn)
self.min_lat, self.max_lat = 59.32, 59.57
self.min_lng, self.max_lng = 24.51, 24.97
# Create grid
grid_size = 10
self.lat_step = (self.max_lat - self.min_lat) / grid_size
self.lng_step = (self.max_lng - self.min_lng) / grid_size
# Generate lat/lng bins
self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1)
self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1)
# Create demand patterns based on our findings
self.demand_patterns = self.create_demand_patterns()
def create_demand_patterns(self):
"""Create realistic demand patterns based on our analysis"""
# Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin]
days = 7
hours = 24
lat_bins = len(self.lat_bins) - 1
lng_bins = len(self.lng_bins) - 1
demand_patterns = np.zeros((days, hours, lat_bins, lng_bins))
value_patterns = np.zeros((days, hours, lat_bins, lng_bins))
# Key areas from our analysis
city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91}
secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94}
university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89}
residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85}
business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56}
hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district]
# Time patterns
hourly_factors = {
0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5,
6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6,
12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8,
18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7
}
# Value patterns - certain times have higher values
value_factors = {
0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7,
6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1,
12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0,
18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2
}
# Day patterns
day_factors = {
0: 0.8, # Monday
1: 0.9, # Tuesday
2: 0.9, # Wednesday
3: 0.85, # Thursday
4: 0.95, # Friday
5: 1.0, # Saturday
6: 0.8 # Sunday
}
# Fill the demand patterns
for day in range(days):
for hour in range(hours):
# Apply base patterns with temporal variations
time_factor = hourly_factors[hour] * day_factors[day]
# Add some specific day-hour combinations
# Tuesday and Thursday early morning and late night have higher values
special_value_factor = 1.0
if (day == 1 or day == 3) and (hour in [4, 22, 23]):
special_value_factor = 2.0
for spot in hotspots:
lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"]
base_demand = spot["base_demand"]
base_value = spot["value"]
# Set demand
demand = base_demand * time_factor
# Add some randomness
demand *= np.random.uniform(0.9, 1.1)
demand_patterns[day, hour, lat_idx, lng_idx] = demand
# Set value
value = base_value * value_factors[hour] * special_value_factor
# Add some randomness
value *= np.random.uniform(0.95, 1.05)
value_patterns[day, hour, lat_idx, lng_idx] = value
# Add some spillover to neighboring cells
for d_lat in [-1, 0, 1]:
for d_lng in [-1, 0, 1]:
if d_lat == 0 and d_lng == 0:
continue
n_lat = lat_idx + d_lat
n_lng = lng_idx + d_lng
if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins):
# Spillover decreases with distance
distance = np.sqrt(d_lat**2 + d_lng**2)
spillover_factor = 0.5 / distance
demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor
value_patterns[day, hour, n_lat, n_lng] += value * 0.9 # Slightly lower values in spillover areas
# Create combined dict
patterns = {
"demand": demand_patterns,
"value": value_patterns
}
return patterns
def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5):
"""
Predict high-demand areas for a given day and hour
Parameters:
- day: Day of week (0=Monday, 6=Sunday)
- hour: Hour of day (0-23)
- current_lat: Driver's current latitude (optional)
- current_lng: Driver's current longitude (optional)
- value_weight: Weight for balancing demand vs value (0-1)
- top_n: Number of recommendations to return
Returns:
- List of recommended areas
"""
demand_matrix = self.demand_patterns["demand"][day, hour]
value_matrix = self.demand_patterns["value"][day, hour]
# Flatten the matrices for ranking
recommendations = []
for lat_idx in range(len(self.lat_bins) - 1):
for lng_idx in range(len(self.lng_bins) - 1):
demand = demand_matrix[lat_idx, lng_idx]
value = value_matrix[lat_idx, lng_idx]
if demand > 0:
center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2
center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2
# Calculate distance if driver location provided
distance_km = None
if current_lat is not None and current_lng is not None:
# Calculate Haversine distance
R = 6371 # Earth radius in kilometers
dLat = np.radians(current_lat - center_lat)
dLon = np.radians(current_lng - center_lng)
a = (np.sin(dLat/2) * np.sin(dLat/2) +
np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) *
np.sin(dLon/2) * np.sin(dLon/2))
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
distance_km = R * c
# Scale demand and value for scoring
max_demand = np.max(demand_matrix)
max_value = np.max(value_matrix)
demand_score = demand / max_demand if max_demand > 0 else 0
value_score = value / max_value if max_value > 0 else 0
# Combined score based on value weight
score = (1 - value_weight) * demand_score + value_weight * value_score
# Adjust for distance if available
if distance_km is not None:
# Distance penalty (decreases as distance increases)
# Effective range ~10km
distance_penalty = 1.0 / (1.0 + distance_km / 5.0)
adjusted_score = score * distance_penalty
else:
adjusted_score = score
recommendations.append({
"center_lat": center_lat,
"center_lng": center_lng,
"predicted_rides": demand,
"avg_value": value,
"expected_value": demand * value,
"score": score,
"adjusted_score": adjusted_score,
"distance_km": distance_km
})
# Sort by adjusted score
sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True)
return sorted_recommendations[:top_n]
# Main application flow
def main():
# Initialize model
model = DemandPredictionModel()
# Sidebar for inputs
with st.sidebar:
st.markdown('<div class="section-header">Driver Options</div>', unsafe_allow_html=True)
# Time selection
st.subheader("Time Selection")
today = datetime.now()
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
selected_day = st.selectbox("Day of Week", days, index=today.weekday())
day_idx = days.index(selected_day)
selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00")
# Location input
st.subheader("Driver Location")
use_location = st.checkbox("Use Current Location", value=True)
# Default to Tallinn center
default_lat, default_lng = 59.436, 24.753
if use_location:
col1, col2 = st.columns(2)
with col1:
current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001)
with col2:
current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001)
else:
current_lat, current_lng = None, None
# Preference settings
st.subheader("Preferences")
num_recommendations = st.slider("Number of Recommendations", 3, 10, 5)
value_weight = st.slider(
"Optimization Balance",
min_value=0.0,
max_value=1.0,
value=0.5,
step=0.1,
help="0 = Focus on ride count, 1 = Focus on ride value"
)
# Advanced options for visual
st.subheader("Display Options")
show_heatmap = st.checkbox("Show Demand Heatmap", value=True)
# Generate recommendations
recommendations = model.predict(
day=day_idx,
hour=selected_hour,
current_lat=current_lat if use_location else None,
current_lng=current_lng if use_location else None,
value_weight=value_weight,
top_n=num_recommendations
)
# Main content area
col1, col2 = st.columns([3, 2])
with col1:
st.markdown('<div class="section-header">Demand Map</div>', unsafe_allow_html=True)
try:
# Create map
m = folium.Map(
location=[59.436, 24.753], # Tallinn center
zoom_start=12,
tiles="CartoDB positron"
)
# Add driver marker if location provided
if use_location:
folium.Marker(
location=[current_lat, current_lng],
popup="Your Location",
icon=folium.Icon(color="blue", icon="user", prefix="fa"),
tooltip="Your Current Location"
).add_to(m)
# Add recommendation markers
for i, rec in enumerate(recommendations):
folium.CircleMarker(
location=[rec["center_lat"], rec["center_lng"]],
radius=20,
color="red",
fill=True,
fill_color="red",
fill_opacity=0.6,
popup=f"""
<b>Recommendation {i+1}</b><br>
Expected rides: {rec['predicted_rides']:.1f}<br>
Avg value: €{rec['avg_value']:.2f}<br>
Expected value: €{rec['expected_value']:.2f}<br>
{f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''}
"""
).add_to(m)
# Add number label - using HTML directly to avoid the split error
folium.Marker(
location=[rec["center_lat"], rec["center_lng"]],
icon=folium.DivIcon(
html=f'<div style="font-size:12pt;color:white;font-weight:bold;text-align:center;width:25px;height:25px;line-height:25px;">{i+1}</div>'
)
).add_to(m)
# Add heatmap if enabled
if show_heatmap:
# Get a larger set of predictions for the heatmap
all_predictions = model.predict(day_idx, selected_hour, top_n=100)
heat_data = [
[pred["center_lat"], pred["center_lng"], pred["predicted_rides"]]
for pred in all_predictions
]
# Add heatmap layer
HeatMap(
heat_data,
radius=15,
gradient={
0.2: 'blue',
0.4: 'lime',
0.6: 'yellow',
0.8: 'orange',
1.0: 'red'
},
name="Demand Heatmap",
show=True
).add_to(m)
# Add layer control
folium.LayerControl().add_to(m)
# Display the map
folium_static(m, width=700)
except Exception as e:
st.error(f"Error rendering map: {e}")
st.info("Showing tabular results instead.")
with col2:
st.markdown('<div class="section-header">Recommendations</div>', unsafe_allow_html=True)
# Create metrics for top recommendation
if recommendations:
top_rec = recommendations[0]
st.markdown('<div class="highlight">', unsafe_allow_html=True)
st.subheader("Top Recommendation")
col1, col2 = st.columns(2)
with col1:
st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}")
st.metric("Avg Value", f"€{top_rec['avg_value']:.2f}")
with col2:
st.metric("Expected Value", f"€{top_rec['expected_value']:.2f}")
if top_rec["distance_km"] is not None:
st.metric("Distance", f"{top_rec['distance_km']:.2f} km")
st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]")
st.markdown('</div>', unsafe_allow_html=True)
# Create formatted table of all recommendations
st.subheader("All Recommendations")
rec_df = pd.DataFrame(recommendations)
# Format for display
display_df = pd.DataFrame({
"Rank": range(1, len(rec_df) + 1),
"Expected Rides": rec_df["predicted_rides"].round(1),
"Avg Value (€)": rec_df["avg_value"].round(2),
"Expected Value (€)": rec_df["expected_value"].round(2)
})
# Add distance if available
if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any():
display_df["Distance (km)"] = rec_df["distance_km"].round(2)
st.table(display_df)
# Add explanation for score calculation
st.markdown('<div class="info-box">', unsafe_allow_html=True)
st.markdown("**How recommendations are calculated:**")
st.markdown("""
- Ride count predictions based on historical patterns
- Value based on average ride fares
- Recommendations balanced by your preferences
- Distance factored in when location is provided
""")
st.markdown('</div>', unsafe_allow_html=True)
# Time series visualization
st.markdown('<div class="section-header">Demand Patterns Analysis</div>', unsafe_allow_html=True)
tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"])
with tab1:
# Generate hourly demand data for the selected day
hourly_data = []
for hour in range(24):
hour_recs = model.predict(day_idx, hour, top_n=100)
total_demand = sum(rec["predicted_rides"] for rec in hour_recs)
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0
hourly_data.append({
"hour": hour,
"demand": total_demand,
"value": avg_value
})
hourly_df = pd.DataFrame(hourly_data)
# Create dual-axis chart
fig = go.Figure()
# Add demand line
fig.add_trace(go.Scatter(
x=hourly_df["hour"],
y=hourly_df["demand"],
name="Demand",
line=dict(color="#4e8cff", width=3),
hovertemplate="Hour: %{x}<br>Demand: %{y:.1f}<extra></extra>"
))
# Add value line on secondary axis
fig.add_trace(go.Scatter(
x=hourly_df["hour"],
y=hourly_df["value"],
name="Avg Value (€)",
line=dict(color="#ff6b6b", width=3, dash="dot"),
yaxis="y2",
hovertemplate="Hour: %{x}<br>Avg Value: €%{y:.2f}<extra></extra>"
))
# Highlight selected hour
fig.add_vline(
x=selected_hour,
line_width=2,
line_dash="dash",
line_color="green",
annotation_text="Selected Hour",
annotation_position="top right"
)
# Update layout
fig.update_layout(
title=f"Hourly Demand Pattern for {selected_day}",
xaxis=dict(
title="Hour of Day",
tickmode="linear",
tick0=0,
dtick=1
),
yaxis=dict(
title="Demand (Expected Rides)",
titlefont=dict(color="#4e8cff"),
tickfont=dict(color="#4e8cff")
),
yaxis2=dict(
title="Average Value (€)",
titlefont=dict(color="#ff6b6b"),
tickfont=dict(color="#ff6b6b"),
anchor="x",
overlaying="y",
side="right"
),
hovermode="x unified",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="center",
x=0.5
)
)
st.plotly_chart(fig, use_container_width=True)
# Add observations
st.markdown("""
**Key Observations:**
- Peak demand typically occurs between 15:00-18:00 (3-6 PM)
- Early morning hours (4-5 AM) often show higher average ride values
- Morning rush hour (6-9 AM) shows moderate demand with variable values
""")
with tab2:
# Generate daily demand data
daily_data = []
for day in range(7):
peak_hour = 17 if day < 5 else 22 # Weekday peak at 5pm, weekend peak at 10pm
day_recs = model.predict(day, peak_hour, top_n=100)
total_demand = sum(rec["predicted_rides"] for rec in day_recs)
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0
daily_data.append({
"day": days[day],
"demand": total_demand,
"value": avg_value
})
daily_df = pd.DataFrame(daily_data)
# Create bar chart
fig = px.bar(
daily_df,
x="day",
y="demand",
color="value",
color_continuous_scale="Viridis",
labels={
"day": "Day of Week",
"demand": "Peak Demand (Expected Rides)",
"value": "Avg Value (€)"
},
title="Peak Demand by Day of Week"
)
# Highlight selected day
fig.add_vline(
x=selected_day,
line_width=2,
line_dash="dash",
line_color="red",
annotation_text="Selected Day",
annotation_position="top right"
)
# Update layout
fig.update_layout(
xaxis=dict(categoryorder="array", categoryarray=days),
coloraxis_colorbar=dict(title="Avg Value (€)")
)
st.plotly_chart(fig, use_container_width=True)
# Add observations
st.markdown("""
**Key Observations:**
- Weekends (especially Saturday) typically show higher demand
- Tuesday and Thursday often have higher average ride values
- Weekend nights show different demand patterns than weekday nights
""")
# Footer section with additional information
st.markdown('<div class="section-header">Tips for Drivers</div>', unsafe_allow_html=True)
tips_col1, tips_col2, tips_col3 = st.columns(3)
with tips_col1:
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("Best Times")
st.markdown("""
- **Weekdays**: 7-9 AM, 4-6 PM
- **Weekends**: 10 PM - 2 AM
- **High Value**: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM)
""")
st.markdown('</div>', unsafe_allow_html=True)
with tips_col2:
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("Best Areas")
st.markdown("""
- **City Center**: Consistent demand throughout the day
- **University Area**: Higher value rides, especially weekdays
- **Business District**: Good during morning rush hours
""")
st.markdown('</div>', unsafe_allow_html=True)
with tips_col3:
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("Strategy Tips")
st.markdown("""
- Position 5-10 minutes before peak times
- Balance high-volume vs high-value areas
- For longer shifts, start with high-value rides then switch to high-volume
""")
st.markdown('</div>', unsafe_allow_html=True)
if __name__ == "__main__":
main()