# streamlit_app.py - Bolt Driver Recommendation System import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go from datetime import datetime, timedelta import folium from folium.plugins import HeatMap, MarkerCluster from streamlit_folium import folium_static import pickle import os # Set page configuration st.set_page_config( page_title="Bolt Driver Recommendation System", page_icon="🚖", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS styling st.markdown(""" """, unsafe_allow_html=True) # Header and app description st.markdown('
Bolt Driver Recommendation System
', unsafe_allow_html=True) with st.container(): st.markdown('
This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.
', unsafe_allow_html=True) class DemandPredictionModel: def __init__(self): """Initialize the demand prediction model""" # In a real app, we would load the model from a file # Here we'll create a dummy version for demonstration self.setup_demo_data() def setup_demo_data(self): """Set up demonstration data based on our analysis""" # Define geographic boundaries (Tallinn) self.min_lat, self.max_lat = 59.32, 59.57 self.min_lng, self.max_lng = 24.51, 24.97 # Create grid grid_size = 10 self.lat_step = (self.max_lat - self.min_lat) / grid_size self.lng_step = (self.max_lng - self.min_lng) / grid_size # Generate lat/lng bins self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1) self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1) # Create demand patterns based on our findings self.demand_patterns = self.create_demand_patterns() def create_demand_patterns(self): """Create realistic demand patterns based on our analysis""" # Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin] days = 7 hours = 24 lat_bins = len(self.lat_bins) - 1 lng_bins = len(self.lng_bins) - 1 demand_patterns = np.zeros((days, hours, lat_bins, lng_bins)) value_patterns = np.zeros((days, hours, lat_bins, lng_bins)) # Key areas from our analysis city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91} secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94} university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89} residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85} business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56} hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district] # Time patterns hourly_factors = { 0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5, 6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6, 12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8, 18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7 } # Value patterns - certain times have higher values value_factors = { 0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7, 6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1, 12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0, 18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2 } # Day patterns day_factors = { 0: 0.8, # Monday 1: 0.9, # Tuesday 2: 0.9, # Wednesday 3: 0.85, # Thursday 4: 0.95, # Friday 5: 1.0, # Saturday 6: 0.8 # Sunday } # Fill the demand patterns for day in range(days): for hour in range(hours): # Apply base patterns with temporal variations time_factor = hourly_factors[hour] * day_factors[day] # Add some specific day-hour combinations # Tuesday and Thursday early morning and late night have higher values special_value_factor = 1.0 if (day == 1 or day == 3) and (hour in [4, 22, 23]): special_value_factor = 2.0 for spot in hotspots: lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"] base_demand = spot["base_demand"] base_value = spot["value"] # Set demand demand = base_demand * time_factor # Add some randomness demand *= np.random.uniform(0.9, 1.1) demand_patterns[day, hour, lat_idx, lng_idx] = demand # Set value value = base_value * value_factors[hour] * special_value_factor # Add some randomness value *= np.random.uniform(0.95, 1.05) value_patterns[day, hour, lat_idx, lng_idx] = value # Add some spillover to neighboring cells for d_lat in [-1, 0, 1]: for d_lng in [-1, 0, 1]: if d_lat == 0 and d_lng == 0: continue n_lat = lat_idx + d_lat n_lng = lng_idx + d_lng if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins): # Spillover decreases with distance distance = np.sqrt(d_lat**2 + d_lng**2) spillover_factor = 0.5 / distance demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor value_patterns[day, hour, n_lat, n_lng] += value * 0.9 # Slightly lower values in spillover areas # Create combined dict patterns = { "demand": demand_patterns, "value": value_patterns } return patterns def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5): """ Predict high-demand areas for a given day and hour Parameters: - day: Day of week (0=Monday, 6=Sunday) - hour: Hour of day (0-23) - current_lat: Driver's current latitude (optional) - current_lng: Driver's current longitude (optional) - value_weight: Weight for balancing demand vs value (0-1) - top_n: Number of recommendations to return Returns: - List of recommended areas """ demand_matrix = self.demand_patterns["demand"][day, hour] value_matrix = self.demand_patterns["value"][day, hour] # Flatten the matrices for ranking recommendations = [] for lat_idx in range(len(self.lat_bins) - 1): for lng_idx in range(len(self.lng_bins) - 1): demand = demand_matrix[lat_idx, lng_idx] value = value_matrix[lat_idx, lng_idx] if demand > 0: center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2 center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2 # Calculate distance if driver location provided distance_km = None if current_lat is not None and current_lng is not None: # Calculate Haversine distance R = 6371 # Earth radius in kilometers dLat = np.radians(current_lat - center_lat) dLon = np.radians(current_lng - center_lng) a = (np.sin(dLat/2) * np.sin(dLat/2) + np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) * np.sin(dLon/2) * np.sin(dLon/2)) c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) distance_km = R * c # Scale demand and value for scoring max_demand = np.max(demand_matrix) max_value = np.max(value_matrix) demand_score = demand / max_demand if max_demand > 0 else 0 value_score = value / max_value if max_value > 0 else 0 # Combined score based on value weight score = (1 - value_weight) * demand_score + value_weight * value_score # Adjust for distance if available if distance_km is not None: # Distance penalty (decreases as distance increases) # Effective range ~10km distance_penalty = 1.0 / (1.0 + distance_km / 5.0) adjusted_score = score * distance_penalty else: adjusted_score = score recommendations.append({ "center_lat": center_lat, "center_lng": center_lng, "predicted_rides": demand, "avg_value": value, "expected_value": demand * value, "score": score, "adjusted_score": adjusted_score, "distance_km": distance_km }) # Sort by adjusted score sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True) return sorted_recommendations[:top_n] # Main application flow def main(): # Initialize model model = DemandPredictionModel() # Sidebar for inputs with st.sidebar: st.markdown('
Driver Options
', unsafe_allow_html=True) # Time selection st.subheader("Time Selection") today = datetime.now() days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] selected_day = st.selectbox("Day of Week", days, index=today.weekday()) day_idx = days.index(selected_day) selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00") # Location input st.subheader("Driver Location") use_location = st.checkbox("Use Current Location", value=True) # Default to Tallinn center default_lat, default_lng = 59.436, 24.753 if use_location: col1, col2 = st.columns(2) with col1: current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001) with col2: current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001) else: current_lat, current_lng = None, None # Preference settings st.subheader("Preferences") num_recommendations = st.slider("Number of Recommendations", 3, 10, 5) value_weight = st.slider( "Optimization Balance", min_value=0.0, max_value=1.0, value=0.5, step=0.1, help="0 = Focus on ride count, 1 = Focus on ride value" ) # Advanced options for visual st.subheader("Display Options") show_heatmap = st.checkbox("Show Demand Heatmap", value=True) # Generate recommendations recommendations = model.predict( day=day_idx, hour=selected_hour, current_lat=current_lat if use_location else None, current_lng=current_lng if use_location else None, value_weight=value_weight, top_n=num_recommendations ) # Main content area col1, col2 = st.columns([3, 2]) with col1: st.markdown('
Demand Map
', unsafe_allow_html=True) try: # Create map m = folium.Map( location=[59.436, 24.753], # Tallinn center zoom_start=12, tiles="CartoDB positron" ) # Add driver marker if location provided if use_location: folium.Marker( location=[current_lat, current_lng], popup="Your Location", icon=folium.Icon(color="blue", icon="user", prefix="fa"), tooltip="Your Current Location" ).add_to(m) # Add recommendation markers for i, rec in enumerate(recommendations): folium.CircleMarker( location=[rec["center_lat"], rec["center_lng"]], radius=20, color="red", fill=True, fill_color="red", fill_opacity=0.6, popup=f""" Recommendation {i+1}
Expected rides: {rec['predicted_rides']:.1f}
Avg value: €{rec['avg_value']:.2f}
Expected value: €{rec['expected_value']:.2f}
{f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''} """ ).add_to(m) # Add number label - using HTML directly to avoid the split error folium.Marker( location=[rec["center_lat"], rec["center_lng"]], icon=folium.DivIcon( html=f'
{i+1}
' ) ).add_to(m) # Add heatmap if enabled if show_heatmap: # Get a larger set of predictions for the heatmap all_predictions = model.predict(day_idx, selected_hour, top_n=100) heat_data = [ [pred["center_lat"], pred["center_lng"], pred["predicted_rides"]] for pred in all_predictions ] # Add heatmap layer HeatMap( heat_data, radius=15, gradient={ 0.2: 'blue', 0.4: 'lime', 0.6: 'yellow', 0.8: 'orange', 1.0: 'red' }, name="Demand Heatmap", show=True ).add_to(m) # Add layer control folium.LayerControl().add_to(m) # Display the map folium_static(m, width=700) except Exception as e: st.error(f"Error rendering map: {e}") st.info("Showing tabular results instead.") with col2: st.markdown('
Recommendations
', unsafe_allow_html=True) # Create metrics for top recommendation if recommendations: top_rec = recommendations[0] st.markdown('
', unsafe_allow_html=True) st.subheader("Top Recommendation") col1, col2 = st.columns(2) with col1: st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}") st.metric("Avg Value", f"€{top_rec['avg_value']:.2f}") with col2: st.metric("Expected Value", f"€{top_rec['expected_value']:.2f}") if top_rec["distance_km"] is not None: st.metric("Distance", f"{top_rec['distance_km']:.2f} km") st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]") st.markdown('
', unsafe_allow_html=True) # Create formatted table of all recommendations st.subheader("All Recommendations") rec_df = pd.DataFrame(recommendations) # Format for display display_df = pd.DataFrame({ "Rank": range(1, len(rec_df) + 1), "Expected Rides": rec_df["predicted_rides"].round(1), "Avg Value (€)": rec_df["avg_value"].round(2), "Expected Value (€)": rec_df["expected_value"].round(2) }) # Add distance if available if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any(): display_df["Distance (km)"] = rec_df["distance_km"].round(2) st.table(display_df) # Add explanation for score calculation st.markdown('
', unsafe_allow_html=True) st.markdown("**How recommendations are calculated:**") st.markdown(""" - Ride count predictions based on historical patterns - Value based on average ride fares - Recommendations balanced by your preferences - Distance factored in when location is provided """) st.markdown('
', unsafe_allow_html=True) # Time series visualization st.markdown('
Demand Patterns Analysis
', unsafe_allow_html=True) tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"]) with tab1: # Generate hourly demand data for the selected day hourly_data = [] for hour in range(24): hour_recs = model.predict(day_idx, hour, top_n=100) total_demand = sum(rec["predicted_rides"] for rec in hour_recs) avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0 hourly_data.append({ "hour": hour, "demand": total_demand, "value": avg_value }) hourly_df = pd.DataFrame(hourly_data) # Create dual-axis chart fig = go.Figure() # Add demand line fig.add_trace(go.Scatter( x=hourly_df["hour"], y=hourly_df["demand"], name="Demand", line=dict(color="#4e8cff", width=3), hovertemplate="Hour: %{x}
Demand: %{y:.1f}" )) # Add value line on secondary axis fig.add_trace(go.Scatter( x=hourly_df["hour"], y=hourly_df["value"], name="Avg Value (€)", line=dict(color="#ff6b6b", width=3, dash="dot"), yaxis="y2", hovertemplate="Hour: %{x}
Avg Value: €%{y:.2f}" )) # Highlight selected hour fig.add_vline( x=selected_hour, line_width=2, line_dash="dash", line_color="green", annotation_text="Selected Hour", annotation_position="top right" ) # Update layout fig.update_layout( title=f"Hourly Demand Pattern for {selected_day}", xaxis=dict( title="Hour of Day", tickmode="linear", tick0=0, dtick=1 ), yaxis=dict( title="Demand (Expected Rides)", titlefont=dict(color="#4e8cff"), tickfont=dict(color="#4e8cff") ), yaxis2=dict( title="Average Value (€)", titlefont=dict(color="#ff6b6b"), tickfont=dict(color="#ff6b6b"), anchor="x", overlaying="y", side="right" ), hovermode="x unified", legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5 ) ) st.plotly_chart(fig, use_container_width=True) # Add observations st.markdown(""" **Key Observations:** - Peak demand typically occurs between 15:00-18:00 (3-6 PM) - Early morning hours (4-5 AM) often show higher average ride values - Morning rush hour (6-9 AM) shows moderate demand with variable values """) with tab2: # Generate daily demand data daily_data = [] for day in range(7): peak_hour = 17 if day < 5 else 22 # Weekday peak at 5pm, weekend peak at 10pm day_recs = model.predict(day, peak_hour, top_n=100) total_demand = sum(rec["predicted_rides"] for rec in day_recs) avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0 daily_data.append({ "day": days[day], "demand": total_demand, "value": avg_value }) daily_df = pd.DataFrame(daily_data) # Create bar chart fig = px.bar( daily_df, x="day", y="demand", color="value", color_continuous_scale="Viridis", labels={ "day": "Day of Week", "demand": "Peak Demand (Expected Rides)", "value": "Avg Value (€)" }, title="Peak Demand by Day of Week" ) # Highlight selected day fig.add_vline( x=selected_day, line_width=2, line_dash="dash", line_color="red", annotation_text="Selected Day", annotation_position="top right" ) # Update layout fig.update_layout( xaxis=dict(categoryorder="array", categoryarray=days), coloraxis_colorbar=dict(title="Avg Value (€)") ) st.plotly_chart(fig, use_container_width=True) # Add observations st.markdown(""" **Key Observations:** - Weekends (especially Saturday) typically show higher demand - Tuesday and Thursday often have higher average ride values - Weekend nights show different demand patterns than weekday nights """) # Footer section with additional information st.markdown('
Tips for Drivers
', unsafe_allow_html=True) tips_col1, tips_col2, tips_col3 = st.columns(3) with tips_col1: st.markdown('
', unsafe_allow_html=True) st.subheader("Best Times") st.markdown(""" - **Weekdays**: 7-9 AM, 4-6 PM - **Weekends**: 10 PM - 2 AM - **High Value**: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM) """) st.markdown('
', unsafe_allow_html=True) with tips_col2: st.markdown('
', unsafe_allow_html=True) st.subheader("Best Areas") st.markdown(""" - **City Center**: Consistent demand throughout the day - **University Area**: Higher value rides, especially weekdays - **Business District**: Good during morning rush hours """) st.markdown('
', unsafe_allow_html=True) with tips_col3: st.markdown('
', unsafe_allow_html=True) st.subheader("Strategy Tips") st.markdown(""" - Position 5-10 minutes before peak times - Balance high-volume vs high-value areas - For longer shifts, start with high-value rides then switch to high-volume """) st.markdown('
', unsafe_allow_html=True) if __name__ == "__main__": main()