Spaces:

Shreneek
/

cabpriceanalysis

Sleeping

App Files Files Community

cabpriceanalysis / app.py

Shreneek

Update app.py

9a9f2ab verified about 2 months ago

raw

history blame contribute delete

26.3 kB

	# streamlit_app.py - Bolt Driver Recommendation System
	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime, timedelta
	import folium
	from folium.plugins import HeatMap, MarkerCluster
	from streamlit_folium import folium_static
	import pickle
	import os

	# Set page configuration
	st.set_page_config(
	page_title="Bolt Driver Recommendation System",
	page_icon="🚖",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	color: #272D37;
	text-align: center;
	margin-bottom: 1rem;
	font-weight: bold;
	}
	.sub-header {
	font-size: 1.8rem;
	color: #272D37;
	margin-top: 1.5rem;
	margin-bottom: 1rem;
	}
	.section-header {
	font-size: 1.3rem;
	color: #272D37;
	margin-top: 1rem;
	margin-bottom: 0.5rem;
	font-weight: bold;
	}
	.highlight {
	background-color: #F0F2F6;
	padding: 1rem;
	border-radius: 0.5rem;
	margin-bottom: 1rem;
	}
	.card {
	background-color: white;
	border-radius: 0.5rem;
	padding: 1.5rem;
	box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
	margin-bottom: 1rem;
	}
	.info-box {
	background-color: #e8f4f8;
	border-left: 5px solid #4e8cff;
	padding: 0.8rem;
	border-radius: 0.3rem;
	margin-bottom: 1rem;
	}
	.metric-container {
	display: flex;
	justify-content: space-between;
	gap: 1rem;
	}
	.metric-card {
	background-color: white;
	border-radius: 0.5rem;
	padding: 1rem;
	text-align: center;
	box-shadow: 0 0.15rem 1.75rem 0 rgba(58, 59, 69, 0.15);
	flex: 1;
	}
	.metric-value {
	font-size: 1.8rem;
	font-weight: bold;
	color: #272D37;
	}
	.metric-label {
	font-size: 0.9rem;
	color: #6e707e;
	}
	</style>
	""", unsafe_allow_html=True)

	# Header and app description
	st.markdown('<div class="main-header">Bolt Driver Recommendation System</div>', unsafe_allow_html=True)

	with st.container():
	st.markdown('<div class="info-box">This application helps Bolt drivers find optimal areas to position themselves based on predicted ride demand and value. The recommendations are personalized based on time, location, and driver preferences.</div>', unsafe_allow_html=True)

	class DemandPredictionModel:
	def __init__(self):
	"""Initialize the demand prediction model"""
	# In a real app, we would load the model from a file
	# Here we'll create a dummy version for demonstration
	self.setup_demo_data()

	def setup_demo_data(self):
	"""Set up demonstration data based on our analysis"""
	# Define geographic boundaries (Tallinn)
	self.min_lat, self.max_lat = 59.32, 59.57
	self.min_lng, self.max_lng = 24.51, 24.97

	# Create grid
	grid_size = 10
	self.lat_step = (self.max_lat - self.min_lat) / grid_size
	self.lng_step = (self.max_lng - self.min_lng) / grid_size

	# Generate lat/lng bins
	self.lat_bins = np.linspace(self.min_lat, self.max_lat, grid_size + 1)
	self.lng_bins = np.linspace(self.min_lng, self.max_lng, grid_size + 1)

	# Create demand patterns based on our findings
	self.demand_patterns = self.create_demand_patterns()

	def create_demand_patterns(self):
	"""Create realistic demand patterns based on our analysis"""
	# Initialize 4D array: [day_of_week][hour][lat_bin][lng_bin]
	days = 7
	hours = 24
	lat_bins = len(self.lat_bins) - 1
	lng_bins = len(self.lng_bins) - 1

	demand_patterns = np.zeros((days, hours, lat_bins, lng_bins))
	value_patterns = np.zeros((days, hours, lat_bins, lng_bins))

	# Key areas from our analysis
	city_center = {"lat_idx": 4, "lng_idx": 5, "base_demand": 300, "value": 1.91}
	secondary_hub = {"lat_idx": 4, "lng_idx": 4, "base_demand": 150, "value": 1.94}
	university_area = {"lat_idx": 3, "lng_idx": 4, "base_demand": 80, "value": 2.89}
	residential_zone = {"lat_idx": 3, "lng_idx": 3, "base_demand": 60, "value": 1.85}
	business_district = {"lat_idx": 4, "lng_idx": 6, "base_demand": 50, "value": 1.56}

	hotspots = [city_center, secondary_hub, university_area, residential_zone, business_district]

	# Time patterns
	hourly_factors = {
	0: 0.5, 1: 0.4, 2: 0.3, 3: 0.3, 4: 0.3, 5: 0.5,
	6: 0.8, 7: 0.9, 8: 0.7, 9: 0.6, 10: 0.6, 11: 0.6,
	12: 0.7, 13: 0.8, 14: 0.9, 15: 1.0, 16: 1.0, 17: 0.8,
	18: 0.7, 19: 0.7, 20: 0.7, 21: 0.8, 22: 0.9, 23: 0.7
	}

	# Value patterns - certain times have higher values
	value_factors = {
	0: 1.4, 1: 0.8, 2: 1.0, 3: 0.6, 4: 1.6, 5: 0.7,
	6: 0.9, 7: 1.1, 8: 1.0, 9: 0.7, 10: 0.8, 11: 1.1,
	12: 0.8, 13: 0.9, 14: 1.6, 15: 0.9, 16: 0.8, 17: 1.0,
	18: 0.8, 19: 0.7, 20: 1.1, 21: 0.8, 22: 1.0, 23: 1.2
	}

	# Day patterns
	day_factors = {
	0: 0.8, # Monday
	1: 0.9, # Tuesday
	2: 0.9, # Wednesday
	3: 0.85, # Thursday
	4: 0.95, # Friday
	5: 1.0, # Saturday
	6: 0.8 # Sunday
	}

	# Fill the demand patterns
	for day in range(days):
	for hour in range(hours):
	# Apply base patterns with temporal variations
	time_factor = hourly_factors[hour] * day_factors[day]

	# Add some specific day-hour combinations
	# Tuesday and Thursday early morning and late night have higher values
	special_value_factor = 1.0
	if (day == 1 or day == 3) and (hour in [4, 22, 23]):
	special_value_factor = 2.0

	for spot in hotspots:
	lat_idx, lng_idx = spot["lat_idx"], spot["lng_idx"]
	base_demand = spot["base_demand"]
	base_value = spot["value"]

	# Set demand
	demand = base_demand * time_factor
	# Add some randomness
	demand *= np.random.uniform(0.9, 1.1)
	demand_patterns[day, hour, lat_idx, lng_idx] = demand

	# Set value
	value = base_value * value_factors[hour] * special_value_factor
	# Add some randomness
	value *= np.random.uniform(0.95, 1.05)
	value_patterns[day, hour, lat_idx, lng_idx] = value

	# Add some spillover to neighboring cells
	for d_lat in [-1, 0, 1]:
	for d_lng in [-1, 0, 1]:
	if d_lat == 0 and d_lng == 0:
	continue

	n_lat = lat_idx + d_lat
	n_lng = lng_idx + d_lng

	if (0 <= n_lat < lat_bins and 0 <= n_lng < lng_bins):
	# Spillover decreases with distance
	distance = np.sqrt(d_lat2 + d_lng2)
	spillover_factor = 0.5 / distance

	demand_patterns[day, hour, n_lat, n_lng] += demand * spillover_factor
	value_patterns[day, hour, n_lat, n_lng] += value * 0.9 # Slightly lower values in spillover areas

	# Create combined dict
	patterns = {
	"demand": demand_patterns,
	"value": value_patterns
	}

	return patterns

	def predict(self, day, hour, current_lat=None, current_lng=None, value_weight=0.5, top_n=5):
	"""
	Predict high-demand areas for a given day and hour

	Parameters:
	- day: Day of week (0=Monday, 6=Sunday)
	- hour: Hour of day (0-23)
	- current_lat: Driver's current latitude (optional)
	- current_lng: Driver's current longitude (optional)
	- value_weight: Weight for balancing demand vs value (0-1)
	- top_n: Number of recommendations to return

	Returns:
	- List of recommended areas
	"""
	demand_matrix = self.demand_patterns["demand"][day, hour]
	value_matrix = self.demand_patterns["value"][day, hour]

	# Flatten the matrices for ranking
	recommendations = []

	for lat_idx in range(len(self.lat_bins) - 1):
	for lng_idx in range(len(self.lng_bins) - 1):
	demand = demand_matrix[lat_idx, lng_idx]
	value = value_matrix[lat_idx, lng_idx]

	if demand > 0:
	center_lat = (self.lat_bins[lat_idx] + self.lat_bins[lat_idx + 1]) / 2
	center_lng = (self.lng_bins[lng_idx] + self.lng_bins[lng_idx + 1]) / 2

	# Calculate distance if driver location provided
	distance_km = None
	if current_lat is not None and current_lng is not None:
	# Calculate Haversine distance
	R = 6371 # Earth radius in kilometers
	dLat = np.radians(current_lat - center_lat)
	dLon = np.radians(current_lng - center_lng)
	a = (np.sin(dLat/2) * np.sin(dLat/2) +
	np.cos(np.radians(current_lat)) * np.cos(np.radians(center_lat)) *
	np.sin(dLon/2) * np.sin(dLon/2))
	c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
	distance_km = R * c

	# Scale demand and value for scoring
	max_demand = np.max(demand_matrix)
	max_value = np.max(value_matrix)

	demand_score = demand / max_demand if max_demand > 0 else 0
	value_score = value / max_value if max_value > 0 else 0

	# Combined score based on value weight
	score = (1 - value_weight) * demand_score + value_weight * value_score

	# Adjust for distance if available
	if distance_km is not None:
	# Distance penalty (decreases as distance increases)
	# Effective range ~10km
	distance_penalty = 1.0 / (1.0 + distance_km / 5.0)
	adjusted_score = score * distance_penalty
	else:
	adjusted_score = score

	recommendations.append({
	"center_lat": center_lat,
	"center_lng": center_lng,
	"predicted_rides": demand,
	"avg_value": value,
	"expected_value": demand * value,
	"score": score,
	"adjusted_score": adjusted_score,
	"distance_km": distance_km
	})

	# Sort by adjusted score
	sorted_recommendations = sorted(recommendations, key=lambda x: x["adjusted_score"], reverse=True)

	return sorted_recommendations[:top_n]

	# Main application flow
	def main():
	# Initialize model
	model = DemandPredictionModel()

	# Sidebar for inputs
	with st.sidebar:
	st.markdown('<div class="section-header">Driver Options</div>', unsafe_allow_html=True)

	# Time selection
	st.subheader("Time Selection")

	today = datetime.now()
	days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
	selected_day = st.selectbox("Day of Week", days, index=today.weekday())
	day_idx = days.index(selected_day)

	selected_hour = st.slider("Hour of Day", 0, 23, today.hour, format="%d:00")

	# Location input
	st.subheader("Driver Location")
	use_location = st.checkbox("Use Current Location", value=True)

	# Default to Tallinn center
	default_lat, default_lng = 59.436, 24.753

	if use_location:
	col1, col2 = st.columns(2)
	with col1:
	current_lat = st.number_input("Latitude", value=default_lat, format="%.5f", step=0.001)
	with col2:
	current_lng = st.number_input("Longitude", value=default_lng, format="%.5f", step=0.001)
	else:
	current_lat, current_lng = None, None

	# Preference settings
	st.subheader("Preferences")

	num_recommendations = st.slider("Number of Recommendations", 3, 10, 5)

	value_weight = st.slider(
	"Optimization Balance",
	min_value=0.0,
	max_value=1.0,
	value=0.5,
	step=0.1,
	help="0 = Focus on ride count, 1 = Focus on ride value"
	)

	# Advanced options for visual
	st.subheader("Display Options")
	show_heatmap = st.checkbox("Show Demand Heatmap", value=True)

	# Generate recommendations
	recommendations = model.predict(
	day=day_idx,
	hour=selected_hour,
	current_lat=current_lat if use_location else None,
	current_lng=current_lng if use_location else None,
	value_weight=value_weight,
	top_n=num_recommendations
	)

	# Main content area
	col1, col2 = st.columns([3, 2])

	with col1:
	st.markdown('<div class="section-header">Demand Map</div>', unsafe_allow_html=True)

	try:
	# Create map
	m = folium.Map(
	location=[59.436, 24.753], # Tallinn center
	zoom_start=12,
	tiles="CartoDB positron"
	)

	# Add driver marker if location provided
	if use_location:
	folium.Marker(
	location=[current_lat, current_lng],
	popup="Your Location",
	icon=folium.Icon(color="blue", icon="user", prefix="fa"),
	tooltip="Your Current Location"
	).add_to(m)

	# Add recommendation markers
	for i, rec in enumerate(recommendations):
	folium.CircleMarker(
	location=[rec["center_lat"], rec["center_lng"]],
	radius=20,
	color="red",
	fill=True,
	fill_color="red",
	fill_opacity=0.6,
	popup=f"""
	<b>Recommendation {i+1}</b><br>
	Expected rides: {rec['predicted_rides']:.1f}<br>
	Avg value: €{rec['avg_value']:.2f}<br>
	Expected value: €{rec['expected_value']:.2f}<br>
	{f'Distance: {rec["distance_km"]:.2f} km' if rec["distance_km"] is not None else ''}
	"""
	).add_to(m)

	# Add number label - using HTML directly to avoid the split error
	folium.Marker(
	location=[rec["center_lat"], rec["center_lng"]],
	icon=folium.DivIcon(
	html=f'<div style="font-size:12pt;color:white;font-weight:bold;text-align:center;width:25px;height:25px;line-height:25px;">{i+1}</div>'
	)
	).add_to(m)

	# Add heatmap if enabled
	if show_heatmap:
	# Get a larger set of predictions for the heatmap
	all_predictions = model.predict(day_idx, selected_hour, top_n=100)
	heat_data = [
	[pred["center_lat"], pred["center_lng"], pred["predicted_rides"]]
	for pred in all_predictions
	]

	# Add heatmap layer
	HeatMap(
	heat_data,
	radius=15,
	gradient={
	0.2: 'blue',
	0.4: 'lime',
	0.6: 'yellow',
	0.8: 'orange',
	1.0: 'red'
	},
	name="Demand Heatmap",
	show=True
	).add_to(m)

	# Add layer control
	folium.LayerControl().add_to(m)

	# Display the map
	folium_static(m, width=700)

	except Exception as e:
	st.error(f"Error rendering map: {e}")
	st.info("Showing tabular results instead.")

	with col2:
	st.markdown('<div class="section-header">Recommendations</div>', unsafe_allow_html=True)

	# Create metrics for top recommendation
	if recommendations:
	top_rec = recommendations[0]

	st.markdown('<div class="highlight">', unsafe_allow_html=True)
	st.subheader("Top Recommendation")

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Expected Rides", f"{top_rec['predicted_rides']:.1f}")
	st.metric("Avg Value", f"€{top_rec['avg_value']:.2f}")
	with col2:
	st.metric("Expected Value", f"€{top_rec['expected_value']:.2f}")
	if top_rec["distance_km"] is not None:
	st.metric("Distance", f"{top_rec['distance_km']:.2f} km")

	st.markdown(f"Location: [{top_rec['center_lat']:.4f}, {top_rec['center_lng']:.4f}]")
	st.markdown('</div>', unsafe_allow_html=True)

	# Create formatted table of all recommendations
	st.subheader("All Recommendations")

	rec_df = pd.DataFrame(recommendations)

	# Format for display
	display_df = pd.DataFrame({
	"Rank": range(1, len(rec_df) + 1),
	"Expected Rides": rec_df["predicted_rides"].round(1),
	"Avg Value (€)": rec_df["avg_value"].round(2),
	"Expected Value (€)": rec_df["expected_value"].round(2)
	})

	# Add distance if available
	if "distance_km" in rec_df.columns and rec_df["distance_km"].notna().any():
	display_df["Distance (km)"] = rec_df["distance_km"].round(2)

	st.table(display_df)

	# Add explanation for score calculation
	st.markdown('<div class="info-box">', unsafe_allow_html=True)
	st.markdown("How recommendations are calculated:")
	st.markdown("""
	- Ride count predictions based on historical patterns
	- Value based on average ride fares
	- Recommendations balanced by your preferences
	- Distance factored in when location is provided
	""")
	st.markdown('</div>', unsafe_allow_html=True)

	# Time series visualization
	st.markdown('<div class="section-header">Demand Patterns Analysis</div>', unsafe_allow_html=True)

	tab1, tab2 = st.tabs(["Hourly Patterns", "Daily Patterns"])

	with tab1:
	# Generate hourly demand data for the selected day
	hourly_data = []
	for hour in range(24):
	hour_recs = model.predict(day_idx, hour, top_n=100)
	total_demand = sum(rec["predicted_rides"] for rec in hour_recs)
	avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in hour_recs) / total_demand if total_demand > 0 else 0

	hourly_data.append({
	"hour": hour,
	"demand": total_demand,
	"value": avg_value
	})

	hourly_df = pd.DataFrame(hourly_data)

	# Create dual-axis chart
	fig = go.Figure()

	# Add demand line
	fig.add_trace(go.Scatter(
	x=hourly_df["hour"],
	y=hourly_df["demand"],
	name="Demand",
	line=dict(color="#4e8cff", width=3),
	hovertemplate="Hour: %{x}<br>Demand: %{y:.1f}<extra></extra>"
	))

	# Add value line on secondary axis
	fig.add_trace(go.Scatter(
	x=hourly_df["hour"],
	y=hourly_df["value"],
	name="Avg Value (€)",
	line=dict(color="#ff6b6b", width=3, dash="dot"),
	yaxis="y2",
	hovertemplate="Hour: %{x}<br>Avg Value: €%{y:.2f}<extra></extra>"
	))

	# Highlight selected hour
	fig.add_vline(
	x=selected_hour,
	line_width=2,
	line_dash="dash",
	line_color="green",
	annotation_text="Selected Hour",
	annotation_position="top right"
	)

	# Update layout
	fig.update_layout(
	title=f"Hourly Demand Pattern for {selected_day}",
	xaxis=dict(
	title="Hour of Day",
	tickmode="linear",
	tick0=0,
	dtick=1
	),
	yaxis=dict(
	title="Demand (Expected Rides)",
	titlefont=dict(color="#4e8cff"),
	tickfont=dict(color="#4e8cff")
	),
	yaxis2=dict(
	title="Average Value (€)",
	titlefont=dict(color="#ff6b6b"),
	tickfont=dict(color="#ff6b6b"),
	anchor="x",
	overlaying="y",
	side="right"
	),
	hovermode="x unified",
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="center",
	x=0.5
	)
	)

	st.plotly_chart(fig, use_container_width=True)

	# Add observations
	st.markdown("""
	Key Observations:
	- Peak demand typically occurs between 15:00-18:00 (3-6 PM)
	- Early morning hours (4-5 AM) often show higher average ride values
	- Morning rush hour (6-9 AM) shows moderate demand with variable values
	""")

	with tab2:
	# Generate daily demand data
	daily_data = []
	for day in range(7):
	peak_hour = 17 if day < 5 else 22 # Weekday peak at 5pm, weekend peak at 10pm
	day_recs = model.predict(day, peak_hour, top_n=100)
	total_demand = sum(rec["predicted_rides"] for rec in day_recs)
	avg_value = sum(rec["avg_value"] * rec["predicted_rides"] for rec in day_recs) / total_demand if total_demand > 0 else 0

	daily_data.append({
	"day": days[day],
	"demand": total_demand,
	"value": avg_value
	})

	daily_df = pd.DataFrame(daily_data)

	# Create bar chart
	fig = px.bar(
	daily_df,
	x="day",
	y="demand",
	color="value",
	color_continuous_scale="Viridis",
	labels={
	"day": "Day of Week",
	"demand": "Peak Demand (Expected Rides)",
	"value": "Avg Value (€)"
	},
	title="Peak Demand by Day of Week"
	)

	# Highlight selected day
	fig.add_vline(
	x=selected_day,
	line_width=2,
	line_dash="dash",
	line_color="red",
	annotation_text="Selected Day",
	annotation_position="top right"
	)

	# Update layout
	fig.update_layout(
	xaxis=dict(categoryorder="array", categoryarray=days),
	coloraxis_colorbar=dict(title="Avg Value (€)")
	)

	st.plotly_chart(fig, use_container_width=True)

	# Add observations
	st.markdown("""
	Key Observations:
	- Weekends (especially Saturday) typically show higher demand
	- Tuesday and Thursday often have higher average ride values
	- Weekend nights show different demand patterns than weekday nights
	""")

	# Footer section with additional information
	st.markdown('<div class="section-header">Tips for Drivers</div>', unsafe_allow_html=True)

	tips_col1, tips_col2, tips_col3 = st.columns(3)

	with tips_col1:
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("Best Times")
	st.markdown("""
	- Weekdays: 7-9 AM, 4-6 PM
	- Weekends: 10 PM - 2 AM
	- High Value: Tuesday & Thursday early morning (4-5 AM) and late night (10 PM-12 AM)
	""")
	st.markdown('</div>', unsafe_allow_html=True)

	with tips_col2:
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("Best Areas")
	st.markdown("""
	- City Center: Consistent demand throughout the day
	- University Area: Higher value rides, especially weekdays
	- Business District: Good during morning rush hours
	""")
	st.markdown('</div>', unsafe_allow_html=True)

	with tips_col3:
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("Strategy Tips")
	st.markdown("""
	- Position 5-10 minutes before peak times
	- Balance high-volume vs high-value areas
	- For longer shifts, start with high-value rides then switch to high-volume
	""")
	st.markdown('</div>', unsafe_allow_html=True)

	if __name__ == "__main__":
	main()