Spaces:
Sleeping
Sleeping
# streamlit_app.py - Bolt Driver Recommendation System | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from datetime import datetime, timedelta | |
import folium | |
from folium.plugins import HeatMap, MarkerCluster | |
from streamlit_folium import folium_static | |
import pickle | |
import os | |
# Set page configuration | |
st.set_page_config( | |
page_title="Bolt Driver Recommendation System", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
.main-header { | |
font-size: 2.5rem; | |
color: #272D37; | |
text-align: center; | |
margin-bottom: 1rem; | |
font-weight: bold; | |
} | |
.sub-header { | |
font-size: 1.8rem; | |
color: #272D37; | |
margin-top: 1.5rem; | |
margin-bottom: 1rem; | |
} | |
.section-header { | |
font-size: 1.3rem; | |
color: #272D37; | |
margin-top: 1rem; | |
margin-bottom: 0.5rem; | |
font-weight: bold; | |
} | |
.highlight { | |
background-color: #F0F2F6; | |
padding: 1rem; | |
border-radius: 0.5rem; | |
margin-bottom: 1rem; | |
} | |
.card { | |
background-color: white; | |
border-radius: 0.5rem; | |
padding: 1.5rem; | |
box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15); | |
margin-bottom: 1rem; | |
} | |
.info-box { | |
background-color: #e8f4f8; | |
border-left: 5px solid #4e8cff; | |
padding: 0.8rem; | |
border-radius: 0.3rem; | |
margin-bottom: 1rem; | |
} | |
.metric-container { | |
display: flex; | |
justify-content: space-between; | |
gap: 1rem; | |
} | |
.metric-card { | |
background-color: white; | |
border-radius: 0.5rem; | |
padding: 1rem; | |
text-align: center; | |
box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15); | |
flex: 1; | |
} | |
.metric-value { | |
font-size: 1.8rem; | |
font-weight: bold; | |
color: #272D37; | |
} | |
.metric-label { | |
font-size: 0.9rem; | |
color: #6e707e; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Header and app description | |
st.markdown('<div class="main-header">Bolt Driver Recommendation System</div>', unsafe_allow_html=True) | |
with st.container(): | |
st.markdown('<div class="info-box">This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.</div>', unsafe_allow_html=True) | |
class DemandPredictionModel: | |
def __init__(self): | |
"""Initialize the demand prediction model""" | |
# In a real app, we would load the model from a file | |
# Here we'll create a dummy version for demonstration | |
self.setup_demo_data() | |
def setup_demo_data(self): | |
"""Set up demonstration data based on our analysis""" | |
# Define geographic boundaries (Tallinn) | |
self.min_lat, self.max_lat = 59.32, 59.57 | |
self.min_lng, self.max_lng = 24.51, 24.97 | |
# Create grid | |
grid_size = 10 | |
self.lat_step = (self.max_lat - self.min_lat) / grid_size | |
self.lng_step = (self.max_lng - self.min_lng) / grid_size | |
# Generate lat/lng bins | |
self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1) | |
self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1) | |
# Create demand patterns based on our findings | |
self.demand_patterns = self.create_demand_patterns() | |
def create_demand_patterns(self): | |
"""Create realistic demand patterns based on our analysis""" | |
# Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin] | |
days = 7 | |
hours = 24 | |
lat_bins = len(self.lat_bins) - 1 | |
lng_bins = len(self.lng_bins) - 1 | |
demand_patterns = np.zeros((days, hours, lat_bins, lng_bins)) | |
value_patterns = np.zeros((days, hours, lat_bins, lng_bins)) | |
# Key areas from our analysis | |
city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91} | |
secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94} | |
university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89} | |
residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85} | |
business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56} | |
hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district] | |
# Time patterns | |
hourly_factors = { | |
0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5, | |
6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6, | |
12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8, | |
18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7 | |
} | |
# Value patterns - certain times have higher values | |
value_factors = { | |
0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7, | |
6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1, | |
12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0, | |
18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2 | |
} | |
# Day patterns | |
day_factors = { | |
0: 0.8, # Monday | |
1: 0.9, # Tuesday | |
2: 0.9, # Wednesday | |
3: 0.85, # Thursday | |
4: 0.95, # Friday | |
5: 1.0, # Saturday | |
6: 0.8 # Sunday | |
} | |
# Fill the demand patterns | |
for day in range(days): | |
for hour in range(hours): | |
# Apply base patterns with temporal variations | |
time_factor = hourly_factors[hour] * day_factors[day] | |
# Add some specific day-hour combinations | |
# Tuesday and Thursday early morning and late night have higher values | |
special_value_factor = 1.0 | |
if (day == 1 or day == 3) and (hour in [4, 22, 23]): | |
special_value_factor = 2.0 | |
for spot in hotspots: | |
lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"] | |
base_demand = spot["base_demand"] | |
base_value = spot["value"] | |
# Set demand | |
demand = base_demand * time_factor | |
# Add some randomness | |
demand *= np.random.uniform(0.9, 1.1) | |
demand_patterns[day, hour, lat_idx, lng_idx] = demand | |
# Set value | |
value = base_value * value_factors[hour] * special_value_factor | |
# Add some randomness | |
value *= np.random.uniform(0.95, 1.05) | |
value_patterns[day, hour, lat_idx, lng_idx] = value | |
# Add some spillover to neighboring cells | |
for d_lat in [-1, 0, 1]: | |
for d_lng in [-1, 0, 1]: | |
if d_lat == 0 and d_lng == 0: | |
continue | |
n_lat = lat_idx + d_lat | |
n_lng = lng_idx + d_lng | |
if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins): | |
# Spillover decreases with distance | |
distance = np.sqrt(d_lat**2 + d_lng**2) | |
spillover_factor = 0.5 / distance | |
demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor | |
value_patterns[day, hour, n_lat, n_lng] += value * 0.9 # Slightly lower values in spillover areas | |
# Create combined dict | |
patterns = { | |
"demand": demand_patterns, | |
"value": value_patterns | |
} | |
return patterns | |
def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5): | |
""" | |
Predict high-demand areas for a given day and hour | |
Parameters: | |
- day: Day of week (0=Monday, 6=Sunday) | |
- hour: Hour of day (0-23) | |
- current_lat: Driver's current latitude (optional) | |
- current_lng: Driver's current longitude (optional) | |
- value_weight: Weight for balancing demand vs value (0-1) | |
- top_n: Number of recommendations to return | |
Returns: | |
- List of recommended areas | |
""" | |
demand_matrix = self.demand_patterns["demand"][day, hour] | |
value_matrix = self.demand_patterns["value"][day, hour] | |
# Flatten the matrices for ranking | |
recommendations = [] | |
for lat_idx in range(len(self.lat_bins) - 1): | |
for lng_idx in range(len(self.lng_bins) - 1): | |
demand = demand_matrix[lat_idx, lng_idx] | |
value = value_matrix[lat_idx, lng_idx] | |
if demand > 0: | |
center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2 | |
center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2 | |
# Calculate distance if driver location provided | |
distance_km = None | |
if current_lat is not None and current_lng is not None: | |
# Calculate Haversine distance | |
R = 6371 # Earth radius in kilometers | |
dLat = np.radians(current_lat - center_lat) | |
dLon = np.radians(current_lng - center_lng) | |
a = (np.sin(dLat/2) * np.sin(dLat/2) + | |
np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) * | |
np.sin(dLon/2) * np.sin(dLon/2)) | |
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) | |
distance_km = R * c | |
# Scale demand and value for scoring | |
max_demand = np.max(demand_matrix) | |
max_value = np.max(value_matrix) | |
demand_score = demand / max_demand if max_demand > 0 else 0 | |
value_score = value / max_value if max_value > 0 else 0 | |
# Combined score based on value weight | |
score = (1 - value_weight) * demand_score + value_weight * value_score | |
# Adjust for distance if available | |
if distance_km is not None: | |
# Distance penalty (decreases as distance increases) | |
# Effective range ~10km | |
distance_penalty = 1.0 / (1.0 + distance_km / 5.0) | |
adjusted_score = score * distance_penalty | |
else: | |
adjusted_score = score | |
recommendations.append({ | |
"center_lat": center_lat, | |
"center_lng": center_lng, | |
"predicted_rides": demand, | |
"avg_value": value, | |
"expected_value": demand * value, | |
"score": score, | |
"adjusted_score": adjusted_score, | |
"distance_km": distance_km | |
}) | |
# Sort by adjusted score | |
sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True) | |
return sorted_recommendations[:top_n] | |
# Main application flow | |
def main(): | |
# Initialize model | |
model = DemandPredictionModel() | |
# Sidebar for inputs | |
with st.sidebar: | |
st.markdown('<div class="section-header">Driver Options</div>', unsafe_allow_html=True) | |
# Time selection | |
st.subheader("Time Selection") | |
today = datetime.now() | |
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] | |
selected_day = st.selectbox("Day of Week", days, index=today.weekday()) | |
day_idx = days.index(selected_day) | |
selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00") | |
# Location input | |
st.subheader("Driver Location") | |
use_location = st.checkbox("Use Current Location", value=True) | |
# Default to Tallinn center | |
default_lat, default_lng = 59.436, 24.753 | |
if use_location: | |
col1, col2 = st.columns(2) | |
with col1: | |
current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001) | |
with col2: | |
current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001) | |
else: | |
current_lat, current_lng = None, None | |
# Preference settings | |
st.subheader("Preferences") | |
num_recommendations = st.slider("Number of Recommendations", 3, 10, 5) | |
value_weight = st.slider( | |
"Optimization Balance", | |
min_value=0.0, | |
max_value=1.0, | |
value=0.5, | |
step=0.1, | |
help="0 = Focus on ride count, 1 = Focus on ride value" | |
) | |
# Advanced options for visual | |
st.subheader("Display Options") | |
show_heatmap = st.checkbox("Show Demand Heatmap", value=True) | |
# Generate recommendations | |
recommendations = model.predict( | |
day=day_idx, | |
hour=selected_hour, | |
current_lat=current_lat if use_location else None, | |
current_lng=current_lng if use_location else None, | |
value_weight=value_weight, | |
top_n=num_recommendations | |
) | |
# Main content area | |
col1, col2 = st.columns([3, 2]) | |
with col1: | |
st.markdown('<div class="section-header">Demand Map</div>', unsafe_allow_html=True) | |
try: | |
# Create map | |
m = folium.Map( | |
location=[59.436, 24.753], # Tallinn center | |
zoom_start=12, | |
tiles="CartoDB positron" | |
) | |
# Add driver marker if location provided | |
if use_location: | |
folium.Marker( | |
location=[current_lat, current_lng], | |
popup="Your Location", | |
icon=folium.Icon(color="blue", icon="user", prefix="fa"), | |
tooltip="Your Current Location" | |
).add_to(m) | |
# Add recommendation markers | |
for i, rec in enumerate(recommendations): | |
folium.CircleMarker( | |
location=[rec["center_lat"], rec["center_lng"]], | |
radius=20, | |
color="red", | |
fill=True, | |
fill_color="red", | |
fill_opacity=0.6, | |
popup=f""" | |
<b>Recommendation {i+1}</b><br> | |
Expected rides: {rec['predicted_rides']:.1f}<br> | |
Avg value: β¬{rec['avg_value']:.2f}<br> | |
Expected value: β¬{rec['expected_value']:.2f}<br> | |
{f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''} | |
""" | |
).add_to(m) | |
# Add number label - using HTML directly to avoid the split error | |
folium.Marker( | |
location=[rec["center_lat"], rec["center_lng"]], | |
icon=folium.DivIcon( | |
html=f'<div style="font-size:12pt;color:white;font-weight:bold;text-align:center;width:25px;height:25px;line-height:25px;">{i+1}</div>' | |
) | |
).add_to(m) | |
# Add heatmap if enabled | |
if show_heatmap: | |
# Get a larger set of predictions for the heatmap | |
all_predictions = model.predict(day_idx, selected_hour, top_n=100) | |
heat_data = [ | |
[pred["center_lat"], pred["center_lng"], pred["predicted_rides"]] | |
for pred in all_predictions | |
] | |
# Add heatmap layer | |
HeatMap( | |
heat_data, | |
radius=15, | |
gradient={ | |
0.2: 'blue', | |
0.4: 'lime', | |
0.6: 'yellow', | |
0.8: 'orange', | |
1.0: 'red' | |
}, | |
name="Demand Heatmap", | |
show=True | |
).add_to(m) | |
# Add layer control | |
folium.LayerControl().add_to(m) | |
# Display the map | |
folium_static(m, width=700) | |
except Exception as e: | |
st.error(f"Error rendering map: {e}") | |
st.info("Showing tabular results instead.") | |
with col2: | |
st.markdown('<div class="section-header">Recommendations</div>', unsafe_allow_html=True) | |
# Create metrics for top recommendation | |
if recommendations: | |
top_rec = recommendations[0] | |
st.markdown('<div class="highlight">', unsafe_allow_html=True) | |
st.subheader("Top Recommendation") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}") | |
st.metric("Avg Value", f"β¬{top_rec['avg_value']:.2f}") | |
with col2: | |
st.metric("Expected Value", f"β¬{top_rec['expected_value']:.2f}") | |
if top_rec["distance_km"] is not None: | |
st.metric("Distance", f"{top_rec['distance_km']:.2f} km") | |
st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Create formatted table of all recommendations | |
st.subheader("All Recommendations") | |
rec_df = pd.DataFrame(recommendations) | |
# Format for display | |
display_df = pd.DataFrame({ | |
"Rank": range(1, len(rec_df) + 1), | |
"Expected Rides": rec_df["predicted_rides"].round(1), | |
"Avg Value (β¬)": rec_df["avg_value"].round(2), | |
"Expected Value (β¬)": rec_df["expected_value"].round(2) | |
}) | |
# Add distance if available | |
if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any(): | |
display_df["Distance (km)"] = rec_df["distance_km"].round(2) | |
st.table(display_df) | |
# Add explanation for score calculation | |
st.markdown('<div class="info-box">', unsafe_allow_html=True) | |
st.markdown("**How recommendations are calculated:**") | |
st.markdown(""" | |
- Ride count predictions based on historical patterns | |
- Value based on average ride fares | |
- Recommendations balanced by your preferences | |
- Distance factored in when location is provided | |
""") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Time series visualization | |
st.markdown('<div class="section-header">Demand Patterns Analysis</div>', unsafe_allow_html=True) | |
tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"]) | |
with tab1: | |
# Generate hourly demand data for the selected day | |
hourly_data = [] | |
for hour in range(24): | |
hour_recs = model.predict(day_idx, hour, top_n=100) | |
total_demand = sum(rec["predicted_rides"] for rec in hour_recs) | |
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0 | |
hourly_data.append({ | |
"hour": hour, | |
"demand": total_demand, | |
"value": avg_value | |
}) | |
hourly_df = pd.DataFrame(hourly_data) | |
# Create dual-axis chart | |
fig = go.Figure() | |
# Add demand line | |
fig.add_trace(go.Scatter( | |
x=hourly_df["hour"], | |
y=hourly_df["demand"], | |
name="Demand", | |
line=dict(color="#4e8cff", width=3), | |
hovertemplate="Hour: %{x}<br>Demand: %{y:.1f}<extra></extra>" | |
)) | |
# Add value line on secondary axis | |
fig.add_trace(go.Scatter( | |
x=hourly_df["hour"], | |
y=hourly_df["value"], | |
name="Avg Value (β¬)", | |
line=dict(color="#ff6b6b", width=3, dash="dot"), | |
yaxis="y2", | |
hovertemplate="Hour: %{x}<br>Avg Value: β¬%{y:.2f}<extra></extra>" | |
)) | |
# Highlight selected hour | |
fig.add_vline( | |
x=selected_hour, | |
line_width=2, | |
line_dash="dash", | |
line_color="green", | |
annotation_text="Selected Hour", | |
annotation_position="top right" | |
) | |
# Update layout | |
fig.update_layout( | |
title=f"Hourly Demand Pattern for {selected_day}", | |
xaxis=dict( | |
title="Hour of Day", | |
tickmode="linear", | |
tick0=0, | |
dtick=1 | |
), | |
yaxis=dict( | |
title="Demand (Expected Rides)", | |
titlefont=dict(color="#4e8cff"), | |
tickfont=dict(color="#4e8cff") | |
), | |
yaxis2=dict( | |
title="Average Value (β¬)", | |
titlefont=dict(color="#ff6b6b"), | |
tickfont=dict(color="#ff6b6b"), | |
anchor="x", | |
overlaying="y", | |
side="right" | |
), | |
hovermode="x unified", | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="center", | |
x=0.5 | |
) | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Add observations | |
st.markdown(""" | |
**Key Observations:** | |
- Peak demand typically occurs between 15:00-18:00 (3-6 PM) | |
- Early morning hours (4-5 AM) often show higher average ride values | |
- Morning rush hour (6-9 AM) shows moderate demand with variable values | |
""") | |
with tab2: | |
# Generate daily demand data | |
daily_data = [] | |
for day in range(7): | |
peak_hour = 17 if day < 5 else 22 # Weekday peak at 5pm, weekend peak at 10pm | |
day_recs = model.predict(day, peak_hour, top_n=100) | |
total_demand = sum(rec["predicted_rides"] for rec in day_recs) | |
avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0 | |
daily_data.append({ | |
"day": days[day], | |
"demand": total_demand, | |
"value": avg_value | |
}) | |
daily_df = pd.DataFrame(daily_data) | |
# Create bar chart | |
fig = px.bar( | |
daily_df, | |
x="day", | |
y="demand", | |
color="value", | |
color_continuous_scale="Viridis", | |
labels={ | |
"day": "Day of Week", | |
"demand": "Peak Demand (Expected Rides)", | |
"value": "Avg Value (β¬)" | |
}, | |
title="Peak Demand by Day of Week" | |
) | |
# Highlight selected day | |
fig.add_vline( | |
x=selected_day, | |
line_width=2, | |
line_dash="dash", | |
line_color="red", | |
annotation_text="Selected Day", | |
annotation_position="top right" | |
) | |
# Update layout | |
fig.update_layout( | |
xaxis=dict(categoryorder="array", categoryarray=days), | |
coloraxis_colorbar=dict(title="Avg Value (β¬)") | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
# Add observations | |
st.markdown(""" | |
**Key Observations:** | |
- Weekends (especially Saturday) typically show higher demand | |
- Tuesday and Thursday often have higher average ride values | |
- Weekend nights show different demand patterns than weekday nights | |
""") | |
# Footer section with additional information | |
st.markdown('<div class="section-header">Tips for Drivers</div>', unsafe_allow_html=True) | |
tips_col1, tips_col2, tips_col3 = st.columns(3) | |
with tips_col1: | |
st.markdown('<div class="card">', unsafe_allow_html=True) | |
st.subheader("Best Times") | |
st.markdown(""" | |
- **Weekdays**: 7-9 AM, 4-6 PM | |
- **Weekends**: 10 PM - 2 AM | |
- **High Value**: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM) | |
""") | |
st.markdown('</div>', unsafe_allow_html=True) | |
with tips_col2: | |
st.markdown('<div class="card">', unsafe_allow_html=True) | |
st.subheader("Best Areas") | |
st.markdown(""" | |
- **City Center**: Consistent demand throughout the day | |
- **University Area**: Higher value rides, especially weekdays | |
- **Business District**: Good during morning rush hours | |
""") | |
st.markdown('</div>', unsafe_allow_html=True) | |
with tips_col3: | |
st.markdown('<div class="card">', unsafe_allow_html=True) | |
st.subheader("Strategy Tips") | |
st.markdown(""" | |
- Position 5-10 minutes before peak times | |
- Balance high-volume vs high-value areas | |
- For longer shifts, start with high-value rides then switch to high-volume | |
""") | |
st.markdown('</div>', unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() |