Spaces:

helvia
/

sessions-observatory

Running

App Files Files Community

sessions-observatory / app.py

eloukas

Add files for HF deployment

1b75011 5 days ago

raw

history blame contribute delete

55 kB

	import base64
	import io
	import random

	import dash
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from dash import Input, Output, State, callback, dcc, html

	# Initialize the Dash app
	app = dash.Dash(__name__, suppress_callback_exceptions=True)
	server = app.server


	# Define app layout
	app.layout = html.Div(
	[
	# Header
	html.Div(
	[
	html.H1(
	"Sessions Observatory by helvia.ai 🔭📊",
	className="app-header",
	),
	html.P(
	"Upload a CSV/Excel file to visualize the chatbot's dialog topics.",
	className="app-description",
	),
	],
	className="header-container",
	),
	# File Upload Component
	html.Div(
	[
	dcc.Upload(
	id="upload-data",
	children=html.Div(
	[
	html.Div("Drag and Drop", className="upload-text"),
	html.Div("or", className="upload-divider"),
	html.Div(
	html.Button("Select a File", className="upload-button")
	),
	],
	className="upload-content",
	),
	style={
	"width": "100%",
	"height": "120px",
	"lineHeight": "60px",
	"borderWidth": "1px",
	"borderStyle": "dashed",
	"borderRadius": "0.5rem",
	"textAlign": "center",
	"margin": "10px 0",
	"backgroundColor": "hsl(210, 40%, 98%)",
	"borderColor": "hsl(214.3, 31.8%, 91.4%)",
	"cursor": "pointer",
	},
	multiple=False,
	),
	# Status message with more padding and emphasis
	html.Div(
	id="upload-status",
	className="upload-status-message",
	style={"display": "none"}, # Initially hidden
	),
	],
	className="upload-container",
	),
	# Main Content Area (hidden until file is uploaded)
	html.Div(
	[
	# Dashboard layout with flexible grid
	html.Div(
	[
	# Left side: Bubble chart
	html.Div(
	[
	html.H3(
	id="topic-distribution-header",
	children="Sessions Observatory",
	className="section-header",
	),
	# dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
	dcc.Graph(
	id="bubble-chart",
	style={"height": "calc(100% - 154px)"},
	), # this does not work for some reason
	html.Div(
	[
	# Only keep Color by
	html.Div(
	[
	html.Div(
	html.Label(
	"Color by:",
	className="control-label",
	),
	className="control-label-container",
	),
	],
	className="control-labels-row",
	),
	# Only keep Color by options
	html.Div(
	[
	html.Div(
	dcc.RadioItems(
	id="color-metric",
	options=[
	{
	"label": "Sentiment",
	"value": "negative_rate",
	},
	{
	"label": "Resolution",
	"value": "unresolved_rate",
	},
	{
	"label": "Urgency",
	"value": "urgent_rate",
	},
	],
	value="negative_rate",
	inline=True,
	className="radio-group",
	inputClassName="radio-input",
	labelClassName="radio-label",
	),
	className="radio-container",
	),
	],
	className="control-options-row",
	),
	],
	className="chart-controls",
	),
	],
	className="chart-container",
	),
	# Right side: Interactive sidebar with topic details
	html.Div(
	[
	html.Div(
	[
	html.H3(
	"Topic Details", className="section-header"
	),
	html.Div(
	id="topic-title", className="topic-title"
	),
	html.Div(
	[
	html.Div(
	[
	html.H4(
	"Metadata",
	className="subsection-header",
	),
	html.Div(
	id="topic-metadata",
	className="metadata-container",
	),
	],
	className="metadata-section",
	),
	html.Div(
	[
	html.H4(
	"Key Metrics",
	className="subsection-header",
	),
	html.Div(
	id="topic-metrics",
	className="metrics-container",
	),
	],
	className="metrics-section",
	),
	# Added Tags section
	html.Div(
	[
	html.H4(
	"Tags",
	className="subsection-header",
	),
	html.Div(
	id="important-tags",
	className="tags-container",
	),
	]
	),
	],
	className="details-section",
	),
	html.Div(
	[
	html.H4(
	"Sample Dialogs (Summary)",
	className="subsection-header",
	),
	html.Div(
	id="sample-dialogs",
	className="sample-dialogs-container",
	),
	],
	className="samples-section",
	),
	],
	className="topic-details-content",
	),
	html.Div(
	id="no-topic-selected",
	children=[
	html.Div(
	[
	html.I(
	className="fas fa-info-circle info-icon"
	),
	html.H3("No topic selected"),
	html.P(
	"Click or hover on a bubble to view topic details."
	),
	],
	className="no-selection-message",
	)
	],
	className="no-selection-container",
	),
	],
	className="sidebar-container",
	),
	],
	className="dashboard-container",
	)
	],
	id="main-content",
	style={"display": "none"},
	),
	# Store the processed data
	dcc.Store(id="stored-data"),
	],
	className="app-container",
	)

	# Define CSS for the app
	app.index_string = """
	<!DOCTYPE html>
	<html>
	<head>
	{%metas%}
	<title>Sessions Observatory by helvia.ai 🔭📊</title>
	{%favicon%}
	{%css%}
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

	:root {
	--background: hsl(210, 20%, 95%);
	--foreground: hsl(222.2, 84%, 4.9%);
	--card: hsl(0, 0%, 100%);
	--card-foreground: hsl(222.2, 84%, 4.9%);
	--popover: hsl(0, 0%, 100%);
	--popover-foreground: hsl(222.2, 84%, 4.9%);
	--primary: hsl(222.2, 47.4%, 11.2%);
	--primary-foreground: hsl(210, 40%, 98%);
	--secondary: hsl(210, 40%, 96.1%);
	--secondary-foreground: hsl(222.2, 47.4%, 11.2%);
	--muted: hsl(210, 40%, 96.1%);
	--muted-foreground: hsl(215.4, 16.3%, 46.9%);
	--accent: hsl(210, 40%, 96.1%);
	--accent-foreground: hsl(222.2, 47.4%, 11.2%);
	--destructive: hsl(0, 84.2%, 60.2%);
	--destructive-foreground: hsl(210, 40%, 98%);
	--border: hsl(214.3, 31.8%, 91.4%);
	--input: hsl(214.3, 31.8%, 91.4%);
	--ring: hsl(222.2, 84%, 4.9%);
	--radius: 0.5rem;
	}

	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	font-family: 'Inter', sans-serif;
	}

	body {
	background-color: var(--background);
	color: var(--foreground);
	font-feature-settings: "rlig" 1, "calt" 1;
	}

	.app-container {
	max-width: 2500px;
	margin: 0 auto;
	padding: 1.5rem;
	background-color: var(--background);
	min-height: 100vh;
	display: flex;
	flex-direction: column;
	}

	.header-container {
	margin-bottom: 2rem;
	text-align: center;
	}

	.app-header {
	color: var(--foreground);
	margin-bottom: 0.75rem;
	font-weight: 600;
	font-size: 2rem;
	line-height: 1.2;
	}

	.app-description {
	color: var(--muted-foreground);
	font-size: 1rem;
	line-height: 1.5;
	}

	.upload-container {
	margin-bottom: 2rem;
	max-width: 800px;
	margin-left: auto;
	margin-right: auto;
	}

	.upload-content {
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	height: 80%;
	padding: 1.5rem;
	position: relative;
	}

	.upload-text {
	font-size: 1rem;
	color: var(--primary);
	font-weight: 500;
	}

	.upload-divider {
	color: var(--muted-foreground);
	margin: 0.5rem 0;
	font-size: 0.875rem;
	}

	.upload-button {
	background-color: var(--primary);
	color: var(--primary-foreground);
	border: none;
	padding: 0.5rem 1rem;
	border-radius: var(--radius);
	font-size: 0.875rem;
	cursor: pointer;
	transition: opacity 0.2s;
	font-weight: 500;
	box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
	height: 2.5rem;
	}

	.upload-button:hover {
	opacity: 0.9;
	}

	/* Status message styling */
	.upload-status-message {
	margin-top: 1rem;
	padding: 0.75rem;
	font-weight: 500;
	text-align: center;
	border-radius: var(--radius);
	font-size: 0.875rem;
	transition: all 0.3s ease;
	background-color: var(--secondary);
	color: var(--secondary-foreground);
	}

	/* Chart controls styling */
	.chart-controls {
	margin-top: 1rem;
	display: flex;
	flex-direction: column;
	gap: 0.75rem;
	padding: 1rem;
	background-color: var(--card);
	border-radius: var(--radius);
	border: 1px solid var(--border);
	box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
	}

	.control-labels-row {
	display: flex;
	width: 100%;
	}

	.control-options-row {
	display: flex;
	width: 100%;
	}

	.control-label-container {
	padding: 0 0.5rem;
	text-align: left;
	}

	.control-label {
	font-weight: 500;
	color: var(--foreground);
	font-size: 0.875rem;
	line-height: 1.25rem;
	}

	.radio-container {
	padding: 0 0.5rem;
	width: 100%;
	}

	.radio-group {
	display: flex;
	gap: 1rem;
	}

	.radio-input {
	margin-right: 0.375rem;
	cursor: pointer;
	height: 1rem;
	width: 1rem;
	border-radius: 9999px;
	border: 1px solid var(--border);
	appearance: none;
	-webkit-appearance: none;
	background-color: var(--background);
	transition: border-color 0.2s;
	}

	.radio-input:checked {
	border-color: var(--primary);
	background-color: var(--primary);
	background-image: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
	background-size: 100% 100%;
	background-position: center;
	background-repeat: no-repeat;
	}

	.radio-label {
	font-weight: 400;
	color: var(--foreground);
	display: flex;
	align-items: center;
	cursor: pointer;
	font-size: 0.875rem;
	line-height: 1.25rem;
	}

	/* Dashboard container */
	.dashboard-container {
	display: flex;
	flex-wrap: wrap;
	gap: 1.5rem;
	flex: 1;
	height: 100%;
	}

	.chart-container {
	flex: 2.75;
	min-width: 400px;
	background: var(--card);
	border-radius: var(--radius);
	box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
	padding: 1rem;
	border: 0.75px solid var(--border);
	height: 100%;
	}

	.sidebar-container {
	flex: 1;
	min-width: 300px;
	background: var(--card);
	border-radius: var(--radius);
	box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
	padding: 1rem;
	position: relative;
	height: 100vh;
	overflow-y: auto;
	border: 1px solid var(--border);
	height: 100%;
	}

	.section-header {
	margin-bottom: 1rem;
	color: var(--foreground);
	border-bottom: 1px solid var(--border);
	padding-bottom: 0.75rem;
	font-weight: 600;
	font-size: 1.25rem;
	}

	.subsection-header {
	margin: 1rem 0 0.75rem;
	color: var(--foreground);
	font-size: 1rem;
	font-weight: 600;
	}

	.topic-title {
	font-size: 1.25rem;
	font-weight: 600;
	color: var(--foreground);
	margin-bottom: 1rem;
	padding: 0.5rem 0.75rem;
	background-color: var(--secondary);
	border-radius: var(--radius);
	}

	.metadata-container {
	display: flex;
	flex-wrap: wrap;
	gap: 0.75rem;
	margin-bottom: 1rem;
	}

	.metadata-item {
	background-color: var(--secondary);
	padding: 0.5rem 0.75rem;
	border-radius: var(--radius);
	font-size: 0.875rem;
	display: flex;
	align-items: center;
	color: var(--secondary-foreground);
	}

	.metadata-icon {
	margin-right: 0.5rem;
	color: var(--primary);
	}

	.metrics-container {
	display: flex;
	justify-content: space-between;
	gap: 0.75rem;
	margin-bottom: 0.75rem;
	}

	.metric-box {
	background-color: var(--card);
	border-radius: var(--radius);
	padding: 0.75rem;
	text-align: center;
	flex: 1;
	border: 1px solid var(--border);
	}

	.metric-box.negative {
	border-left: 3px solid var(--destructive);
	}

	.metric-box.unresolved {
	border-left: 3px solid hsl(47.9, 95.8%, 53.1%);
	}

	.metric-box.urgent {
	border-left: 3px solid hsl(217.2, 91.2%, 59.8%);
	}

	.metric-value {
	font-size: 1.5rem;
	font-weight: 600;
	margin-bottom: 0.25rem;
	color: var(--foreground);
	line-height: 1;
	}

	.metric-label {
	font-size: 0.75rem;
	color: var(--muted-foreground);
	}

	.sample-dialogs-container {
	margin-top: 0.75rem;
	}

	.dialog-item {
	background-color: var(--secondary);
	border-radius: var(--radius);
	padding: 1rem;
	margin-bottom: 0.75rem;
	border-left: 3px solid var(--primary);
	}

	.dialog-summary {
	font-size: 0.875rem;
	line-height: 1.5;
	margin-bottom: 0.5rem;
	color: var(--foreground);
	}

	.dialog-metadata {
	display: flex;
	flex-wrap: wrap;
	gap: 0.5rem;
	margin-top: 0.5rem;
	font-size: 0.75rem;
	}

	.dialog-tag {
	padding: 0.25rem 0.5rem;
	border-radius: var(--radius);
	font-size: 0.7rem;
	font-weight: 500;
	}

	.tag-sentiment {
	background-color: var(--destructive);
	color: var(--destructive-foreground);
	}

	.tag-resolution {
	background-color: hsl(47.9, 95.8%, 53.1%);
	color: hsl(222.2, 84%, 4.9%);
	}

	.tag-urgency {
	background-color: hsl(217.2, 91.2%, 59.8%);
	color: hsl(210, 40%, 98%);
	}

	.tag-chat-id {
	background-color: hsl(215.4, 16.3%, 46.9%);
	color: hsl(210, 40%, 98%);
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-weight: 500;
	}

	.no-selection-container {
	position: absolute;
	top: 0;
	left: 0;
	right: 0;
	bottom: 0;
	display: flex;
	align-items: center;
	justify-content: center;
	background-color: hsla(0, 0%, 100%, 0.95);
	z-index: 10;
	border-radius: var(--radius);
	}

	.no-selection-message {
	text-align: center;
	color: var(--muted-foreground);
	padding: 1.5rem;
	}

	.info-icon {
	font-size: 2rem;
	margin-bottom: 0.75rem;
	color: var(--muted);
	}

	/* Tags container */
	.tags-container {
	display: flex;
	flex-wrap: wrap;
	gap: 8px;
	margin-top: 5px;
	margin-bottom: 15px;
	padding: 6px;
	border-radius: 8px;
	background-color: #f8f9fa;
	}


	.topic-tag {
	padding: 0.375rem 0.75rem;
	border-radius: var(--radius);
	font-size: 0.75rem;
	display: inline-flex;
	align-items: center;
	transition: all 0.2s ease;
	font-weight: 500;
	margin-bottom: 0.25rem;
	cursor: default;
	background-color: var(--muted);
	color: var(--muted-foreground);
	border: 1px solid var(--border);
	}

	.topic-tag {
	padding: 6px 12px;
	border-radius: 15px;
	font-size: 0.8rem;
	display: inline-flex;
	align-items: center;
	box-shadow: 0 1px 3px rgba(0,0,0,0.12);
	transition: all 0.2s ease;
	font-weight: 500;
	margin-bottom: 5px;
	cursor: default;
	border: 1px solid rgba(0,0,0,0.08);
	background-color: #6c757d; /* Consistent medium gray color */
	color: white;
	}

	.topic-tag:hover {
	transform: translateY(-1px);
	box-shadow: 0 3px 5px rgba(0,0,0,0.15);
	background-color: #5a6268; /* Slightly darker on hover */
	}

	.topic-tag-icon {
	margin-right: 5px;
	font-size: 0.7rem;
	opacity: 0.8;
	color: rgba(255, 255, 255, 0.9);
	}

	.no-tags-message {
	color: var(--muted-foreground);
	font-style: italic;
	padding: 0.75rem;
	text-align: center;
	width: 100%;
	}

	/* Responsive adjustments */
	@media (max-width: 768px) {
	.dashboard-container {
	flex-direction: column;
	}
	.chart-container, .sidebar-container {
	width: 100%;
	}
	.app-header {
	font-size: 1.5rem;
	}
	}
	</style>
	</head>
	<body>
	{%app_entry%}
	<footer>
	{%config%}
	{%scripts%}
	{%renderer%}
	</footer>
	</body>
	</html>
	"""


	@callback(
	Output("topic-distribution-header", "children"),
	Input("stored-data", "data"),
	)
	def update_topic_distribution_header(data):
	if not data:
	return "Sessions Observatory" # Default when no data is available

	df = pd.DataFrame(data)
	total_dialogs = df["count"].sum() # Sum up the 'count' column
	return f"Sessions Observatory ({total_dialogs} dialogs)"


	# Define callback to process uploaded file
	@callback(
	[
	Output("stored-data", "data"),
	Output("upload-status", "children"),
	Output("upload-status", "style"), # Add style output for visibility
	Output("main-content", "style"),
	],
	[Input("upload-data", "contents")],
	[State("upload-data", "filename")],
	)
	def process_upload(contents, filename):
	if contents is None:
	return None, "", {"display": "none"}, {"display": "none"} # Keep hidden

	try:
	# Parse uploaded file
	content_type, content_string = contents.split(",")
	decoded = base64.b64decode(content_string)

	if "csv" in filename.lower():
	df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
	elif "xls" in filename.lower():
	df = pd.read_excel(io.BytesIO(decoded))
	else:
	return (
	None,
	html.Div(
	[
	html.I(
	className="fas fa-exclamation-circle",
	style={"color": "var(--destructive)", "marginRight": "8px"},
	),
	"Please upload a CSV or Excel file.",
	],
	style={"color": "var(--destructive)"},
	),
	{"display": "block"}, # Make visible after error
	{"display": "none"},
	)

	# Process the dataframe to get topic statistics
	topic_stats = analyze_topics(df)

	return (
	topic_stats.to_dict("records"),
	html.Div(
	[
	html.I(
	className="fas fa-check-circle",
	style={
	"color": "hsl(142.1, 76.2%, 36.3%)",
	"marginRight": "8px",
	},
	),
	f'Successfully uploaded "{filename}"',
	],
	style={"color": "hsl(142.1, 76.2%, 36.3%)"},
	),
	{"display": "block"}, # maybe add the above line here too #TODO
	{
	"display": "block",
	"height": "calc(100vh - 40px)",
	}, # Make visible after successful upload
	)

	except Exception as e:
	return (
	None,
	html.Div(
	[
	html.I(
	className="fas fa-exclamation-triangle",
	style={"color": "var(--destructive)", "marginRight": "8px"},
	),
	f"Error processing file: {str(e)}",
	],
	style={"color": "var(--destructive)"},
	),
	{"display": "block"}, # Make visible after error
	{"display": "none"},
	)


	# Function to analyze the topics and create statistics
	def analyze_topics(df):
	# Group by topic name and calculate metrics
	topic_stats = (
	df.groupby("deduplicated_topic_name")
	.agg(
	count=("id", "count"),
	negative_count=("Sentiment", lambda x: (x == "negative").sum()),
	unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()),
	urgent_count=("Urgency", lambda x: (x == "urgent").sum()),
	)
	.reset_index()
	)

	# Calculate rates
	topic_stats["negative_rate"] = (
	topic_stats["negative_count"] / topic_stats["count"] * 100
	).round(1)
	topic_stats["unresolved_rate"] = (
	topic_stats["unresolved_count"] / topic_stats["count"] * 100
	).round(1)
	topic_stats["urgent_rate"] = (
	topic_stats["urgent_count"] / topic_stats["count"] * 100
	).round(1)

	# Apply binned layout
	topic_stats = apply_binned_layout(topic_stats)

	return topic_stats


	# New binned layout function


	def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
	"""
	Apply a binned layout where bubbles are grouped into rows based on dialog count.
	Bubbles in each row will be centered horizontally.

	Args:
	df: DataFrame containing the topic data
	padding: Padding from edges as percentage
	bin_config: List of tuples defining bin ranges and descriptions.
	Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
	max_items_per_row: Maximum number of items to display in a single row

	Returns:
	DataFrame with updated x, y positions
	"""
	# Create a copy of the dataframe to avoid modifying the original
	df_sorted = df.copy()

	# Default bin configuration if none is provided
	# 8 rows x 6 bubbles is usually good
	if bin_config is None:
	bin_config = [
	(100, None, "100+ dialogs"),
	(50, 99, "50-99 dialogs"),
	(25, 49, "25-49 dialogs"),
	(9, 24, "9-24 dialogs"),
	(7, 8, "7-8 dialogs"),
	(5, 7, "5-6 dialogs"),
	(4, 4, "4 dialogs"),
	(0, 3, "0-3 dialogs"),
	]

	# Generate bin descriptions and conditions dynamically
	bin_descriptions = {}
	conditions = []
	bin_values = []

	for i, (lower, upper, description) in enumerate(bin_config):
	bin_name = f"Bin {i + 1}"
	bin_descriptions[bin_name] = description
	bin_values.append(bin_name)

	if upper is None: # No upper limit
	conditions.append(df_sorted["count"] >= lower)
	else:
	conditions.append(
	(df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
	)

	# Apply the conditions to create the bin column
	df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
	df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)

	# Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
	df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

	# Now split bins that have more than max_items_per_row items
	original_bins = df_sorted["bin"].unique()
	new_rows = []
	new_bin_descriptions = bin_descriptions.copy()

	for bin_name in original_bins:
	bin_mask = df_sorted["bin"] == bin_name
	bin_group = df_sorted[bin_mask]
	bin_size = len(bin_group)

	# If bin has more items than max_items_per_row, split it
	if bin_size > max_items_per_row:
	# Calculate how many sub-bins we need
	num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row

	# Calculate items per sub-bin (distribute evenly)
	items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins

	# Distribute the remainder one by one to achieve balance
	remainder = bin_size % num_sub_bins
	for i in range(remainder):
	items_per_sub_bin[i] += 1

	# Original bin description
	original_description = bin_descriptions[bin_name]

	# Create new row entries and update bin assignments
	start_idx = 0
	for i in range(num_sub_bins):
	# Create new bin name with sub-bin index
	new_bin_name = f"{bin_name}_{i + 1}"

	# Create new bin description with sub-bin index
	new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
	new_bin_descriptions[new_bin_name] = new_description

	# Get slice of dataframe for this sub-bin
	end_idx = start_idx + items_per_sub_bin[i]
	sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()

	# Update bin name and description
	sub_bin_rows["bin"] = new_bin_name
	sub_bin_rows["bin_description"] = new_description

	# Add to new rows
	new_rows.append(sub_bin_rows)

	# Update start index for next iteration
	start_idx = end_idx

	# Remove the original bin from df_sorted
	df_sorted = df_sorted[~bin_mask]

	# Combine the original dataframe (with small bins) and the new split bins
	if new_rows:
	df_sorted = pd.concat([df_sorted] + new_rows)

	# Re-sort with the new bin names
	df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

	# Calculate the vertical positions for each row (bin)
	bins_with_topics = sorted(df_sorted["bin"].unique())
	num_rows = len(bins_with_topics)

	available_height = 100 - (2 * padding)
	row_height = available_height / num_rows

	# Calculate and assign y-positions (vertical positions)
	row_positions = {}
	for i, bin_name in enumerate(bins_with_topics):
	# Calculate row position (centered within its allocated space)
	row_pos = padding + i * row_height + (row_height / 2)
	row_positions[bin_name] = row_pos

	df_sorted["y"] = df_sorted["bin"].map(row_positions)

	# Center the bubbles in each row horizontally
	center_point = 50 # Middle of the chart (0-100 scale)
	for bin_name in bins_with_topics:
	# Get topics in this bin
	bin_mask = df_sorted["bin"] == bin_name
	num_topics_in_bin = bin_mask.sum()

	if num_topics_in_bin == 1:
	# If there's only one bubble, place it in the center
	df_sorted.loc[bin_mask, "x"] = center_point
	else:
	if num_topics_in_bin < max_items_per_row:
	# For fewer bubbles, add a little bit of spacing between them
	# Calculate the total width needed
	total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
	# Calculate starting position (to center the group)
	start_pos = center_point - (total_width / 2)
	# Assign positions
	positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
	df_sorted.loc[bin_mask, "x"] = positions
	else:
	# For multiple bubbles, distribute them evenly around the center
	# Calculate the total width needed
	total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles

	# Calculate starting position (to center the group)
	start_pos = center_point - (total_width / 2)

	# Assign positions
	positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
	df_sorted.loc[bin_mask, "x"] = positions

	# Add original rank for reference
	df_sorted["size_rank"] = range(1, len(df_sorted) + 1)

	return df_sorted


	# New function to update positions based on selected size metric
	def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
	# For the main chart, we always use the binned layout
	return apply_binned_layout(df)


	# Callback to update the bubble chart
	@callback(
	Output("bubble-chart", "figure"),
	[
	Input("stored-data", "data"),
	Input("color-metric", "value"),
	],
	)
	def update_bubble_chart(data, color_metric):
	if not data:
	return go.Figure()

	df = pd.DataFrame(data)

	# Update positions using binned layout
	df = update_bubble_positions(df)

	# Always use count for sizing
	size_values = df["count"]
	raw_sizes = df["count"]
	size_title = "Dialog Count"

	# Apply log scaling to the size values for better visualization
	# To make the smallest bubble bigger, increase the min_size value (currently 2.5).
	min_size = 1 # Minimum bubble size
	if size_values.max() > size_values.min():
	# Log-scale the sizes
	log_sizes = np.log1p(size_values)
	# Scale to a reasonable range for visualization
	# To make the biggest bubble smaller, reduce the multiplier (currently 50).
	size_values = (
	min_size
	+ (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
	)
	else:
	# If all values are the same, use a default size
	size_values = np.ones(len(df)) * 12.5

	# DEBUG: Print sizes of bubbles in the first and second bins
	bins = sorted(df["bin"].unique())
	if len(bins) >= 1:
	first_bin = bins[0]
	print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
	first_bin_df = df[df["bin"] == first_bin]
	for idx, row in first_bin_df.iterrows():
	print(
	f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
	)

	if len(bins) >= 2:
	second_bin = bins[1]
	print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
	second_bin_df = df[df["bin"] == second_bin]
	for idx, row in second_bin_df.iterrows():
	print(
	f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
	)

	# Determine color based on selected metric
	if color_metric == "negative_rate":
	color_values = df["negative_rate"]
	# color_title = "Negative Sentiment (%)"
	color_title = "Negativity (%)"
	# color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
	# color_scale = "Portland"
	# color_scale = "RdYlGn_r"
	# color_scale = "Teal"
	color_scale = "Teal"

	elif color_metric == "unresolved_rate":
	color_values = df["unresolved_rate"]
	color_title = "Unresolved (%)"
	# color_scale = "Burg" # og is YlOrRd
	# color_scale = "Temps"
	# color_scale = "Armyrose"
	# color_scale = "YlOrRd"
	color_scale = "Teal"
	else:
	color_values = df["urgent_rate"]
	color_title = "Urgency (%)"
	# color_scale = "Magenta" # og is Blues
	# color_scale = "Tealrose"
	# color_scale = "Portland"
	color_scale = "Teal"

	# Set all text positions to bottom for consistent layout
	text_positions = ["bottom center"] * len(df)

	# Create enhanced hover text that includes bin information
	hover_text = [
	f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
	for topic, raw, color, bin_desc in zip(
	df["deduplicated_topic_name"],
	raw_sizes,
	color_values,
	df["bin_description"],
	)
	]

	# Create bubble chart
	fig = px.scatter(
	df,
	x="x",
	y="y",
	size=size_values,
	color=color_values,
	# text="deduplicated_topic_name", # Remove text here
	hover_name="deduplicated_topic_name",
	hover_data={
	"x": False,
	"y": False,
	"bin_description": True,
	},
	size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
	color_continuous_scale=color_scale,
	custom_data=[
	"deduplicated_topic_name",
	"count",
	"negative_rate",
	"unresolved_rate",
	"urgent_rate",
	"bin_description",
	],
	)

	# Update traces: Remove text related properties
	fig.update_traces(
	mode="markers", # Remove '+text'
	marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
	hovertemplate="%{hovertext}<extra></extra>",
	hovertext=hover_text,
	)

	# Create annotations for the bubbles
	annotations = []
	for i, row in df.iterrows():
	# Wrap text every 2 words
	words = row["deduplicated_topic_name"].split()
	wrapped_text = "<br>".join(
	[" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
	)

	# Calculate size for vertical offset (approximately based on the bubble size)
	# Add vertical offset based on bubble size to place text below the bubble
	marker_size = (
	size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
	) # Adjust this divisor as needed to get proper spacing

	annotations.append(
	dict(
	x=row["x"],
	y=row["y"]
	+ 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
	+ marker_size, # Add vertical offset to position text below the bubble
	text=wrapped_text,
	showarrow=False,
	textangle=0,
	font=dict(
	size=10,
	# size=8,
	color="var(--foreground)",
	family="Arial, sans-serif",
	weight="bold",
	),
	xanchor="center",
	yanchor="top", # Anchor to top of text box so it hangs below the bubble
	bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
	bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
	borderwidth=1,
	borderpad=1,
	# TODO: Radius for rounded corners
	)
	)

	# Add bin labels and separator lines
	unique_bins = sorted(df["bin"].unique())
	bin_y_positions = [
	df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
	]

	# Dynamically extract bin descriptions
	bin_descriptions = df.set_index("bin")["bin_description"].to_dict()

	for bin_name, bin_y in zip(unique_bins, bin_y_positions):
	# Add horizontal line
	fig.add_shape(
	type="line",
	x0=0,
	y0=bin_y,
	x1=100,
	y1=bin_y,
	line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
	layer="below",
	)

	# Add subtle lines for each bin and bin labels
	for bin_name, bin_y in zip(unique_bins, bin_y_positions):
	# Add horizontal line
	fig.add_shape(
	type="line",
	x0=0,
	y0=bin_y,
	x1=100,
	y1=bin_y,
	line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
	layer="below",
	)

	# Add bin label annotation
	annotations.append(
	dict(
	x=0, # Position the label on the left side
	y=bin_y,
	xref="x",
	yref="y",
	text=bin_descriptions[bin_name],
	showarrow=False,
	font=dict(size=8.25, color="var(--muted-foreground)"),
	align="left",
	xanchor="left",
	yanchor="middle",
	bgcolor="rgba(255,255,255,0.7)",
	borderpad=1,
	)
	)

	fig.update_layout(
	title=None,
	xaxis=dict(
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	title=None,
	range=[0, 100],
	),
	yaxis=dict(
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	title=None,
	range=[0, 100],
	autorange="reversed", # Keep largest at top
	),
	hovermode="closest",
	margin=dict(l=0, r=0, t=10, b=10),
	coloraxis_colorbar=dict(
	title=color_title,
	title_font=dict(size=9),
	tickfont=dict(size=8),
	thickness=10,
	len=0.6,
	yanchor="middle",
	y=0.5,
	xpad=0,
	),
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
	paper_bgcolor="rgba(0,0,0,0)",
	plot_bgcolor="rgba(0,0,0,0)",
	hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
	annotations=annotations, # Add bin labels as annotations
	)

	return fig


	# Update the update_topic_details callback to use grayscale colors for tags based on frequency
	@callback(
	[
	Output("topic-title", "children"),
	Output("topic-metadata", "children"),
	Output("topic-metrics", "children"),
	Output("important-tags", "children"),
	Output("sample-dialogs", "children"),
	Output("no-topic-selected", "style"),
	],
	[Input("bubble-chart", "hoverData"), Input("bubble-chart", "clickData")],
	[State("stored-data", "data"), State("upload-data", "contents")],
	)
	def update_topic_details(hover_data, click_data, stored_data, file_contents):
	# Determine which data to use (prioritize click over hover)
	hover_info = hover_data or click_data

	if not hover_info or not stored_data or not file_contents:
	return "", [], [], "", [], {"display": "flex"}

	# Extract topic name from the hover data
	topic_name = hover_info["points"][0]["customdata"][0]

	# Get stored data for this topic
	df_stored = pd.DataFrame(stored_data)
	topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]

	# Get original data to sample conversations
	content_type, content_string = file_contents.split(",")
	decoded = base64.b64decode(content_string)

	if (
	content_type
	== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
	):
	df_full = pd.read_excel(io.BytesIO(decoded))
	else: # Assume CSV
	df_full = pd.read_csv(io.StringIO(decoded.decode("utf-8")))

	# Filter to this topic
	topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]

	# Create the title
	title = html.Div([html.Span(topic_name)])

	# Create metadata items
	metadata_items = [
	html.Div(
	[
	html.I(className="fas fa-comments metadata-icon"),
	html.Span(f"{int(topic_data['count'])} dialogs"),
	],
	className="metadata-item",
	),
	]

	# Create metrics boxes
	metrics_boxes = [
	html.Div(
	[
	html.Div(f"{topic_data['negative_rate']}%", className="metric-value"),
	html.Div("Negative Sentiment", className="metric-label"),
	],
	className="metric-box negative",
	),
	html.Div(
	[
	html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"),
	html.Div("Unresolved", className="metric-label"),
	],
	className="metric-box unresolved",
	),
	html.Div(
	[
	html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"),
	html.Div("Urgent", className="metric-label"),
	],
	className="metric-box urgent",
	),
	]

	# New: Extract and process consolidated_tags with improved styling
	tags_list = []
	for _, row in topic_conversations.iterrows():
	tags_str = row.get("consolidated_tags", "")
	if pd.notna(tags_str):
	tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
	tags_list.extend(tags)

	# Count tag frequencies for better insight
	tag_counts = {}
	for tag in tags_list:
	tag_counts[tag] = tag_counts.get(tag, 0) + 1

	# Sort by frequency (most common first) and then alphabetically for ties
	sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))

	# Keep only the top K tags
	TOP_K = 15
	sorted_tags = sorted_tags[:TOP_K]

	if sorted_tags:
	# Create beautifully styled tags with count indicators and consistent color
	tags_output = html.Div(
	[
	html.Div(
	[
	html.I(className="fas fa-tag topic-tag-icon"),
	html.Span(f"{tag} ({count})"),
	],
	className="topic-tag",
	)
	for tag, count in sorted_tags
	],
	className="tags-container",
	)
	else:
	tags_output = html.Div(
	[
	html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
	"No tags found for this topic",
	],
	className="no-tags-message",
	)

	# Sample up to 5 random dialogs
	sample_size = min(5, len(topic_conversations))
	if sample_size > 0:
	sample_indices = random.sample(range(len(topic_conversations)), sample_size)
	samples = topic_conversations.iloc[sample_indices]

	dialog_items = []
	for _, row in samples.iterrows():
	# Create dialog item with tags
	sentiment_tag = html.Span(
	row["Sentiment"], className="dialog-tag tag-sentiment"
	)
	resolution_tag = html.Span(
	row["Resolution"], className="dialog-tag tag-resolution"
	)
	urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")

	# Add Chat ID tag if 'id' column exists
	chat_id_tag = None
	if "id" in row:
	chat_id_tag = html.Span(
	f"Chat ID: {row['id']}", className="dialog-tag tag-chat-id"
	)

	# Compile all tags, including the new Chat ID tag if available
	tags = [sentiment_tag, resolution_tag, urgency_tag]
	if chat_id_tag:
	tags.append(chat_id_tag)

	dialog_items.append(
	html.Div(
	[
	html.Div(row["Summary"], className="dialog-summary"),
	html.Div(
	tags,
	className="dialog-metadata",
	),
	],
	className="dialog-item",
	)
	)

	sample_dialogs = dialog_items
	else:
	sample_dialogs = [
	html.Div(
	"No sample dialogs available for this topic.",
	style={"color": "var(--muted-foreground)"},
	)
	]

	return (
	title,
	metadata_items,
	metrics_boxes,
	tags_output,
	sample_dialogs,
	{"display": "none"},
	)


	if __name__ == "__main__":
	app.run_server(debug=False)