eloukas's picture
Add files for HF deployment
1b75011
import base64
import io
import random
import dash
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Input, Output, State, callback, dcc, html
# Initialize the Dash app
app = dash.Dash(__name__, suppress_callback_exceptions=True)
server = app.server
# Define app layout
app.layout = html.Div(
[
# Header
html.Div(
[
html.H1(
"Sessions Observatory by helvia.ai πŸ”­πŸ“Š",
className="app-header",
),
html.P(
"Upload a CSV/Excel file to visualize the chatbot's dialog topics.",
className="app-description",
),
],
className="header-container",
),
# File Upload Component
html.Div(
[
dcc.Upload(
id="upload-data",
children=html.Div(
[
html.Div("Drag and Drop", className="upload-text"),
html.Div("or", className="upload-divider"),
html.Div(
html.Button("Select a File", className="upload-button")
),
],
className="upload-content",
),
style={
"width": "100%",
"height": "120px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "0.5rem",
"textAlign": "center",
"margin": "10px 0",
"backgroundColor": "hsl(210, 40%, 98%)",
"borderColor": "hsl(214.3, 31.8%, 91.4%)",
"cursor": "pointer",
},
multiple=False,
),
# Status message with more padding and emphasis
html.Div(
id="upload-status",
className="upload-status-message",
style={"display": "none"}, # Initially hidden
),
],
className="upload-container",
),
# Main Content Area (hidden until file is uploaded)
html.Div(
[
# Dashboard layout with flexible grid
html.Div(
[
# Left side: Bubble chart
html.Div(
[
html.H3(
id="topic-distribution-header",
children="Sessions Observatory",
className="section-header",
),
# dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
dcc.Graph(
id="bubble-chart",
style={"height": "calc(100% - 154px)"},
), # this does not work for some reason
html.Div(
[
# Only keep Color by
html.Div(
[
html.Div(
html.Label(
"Color by:",
className="control-label",
),
className="control-label-container",
),
],
className="control-labels-row",
),
# Only keep Color by options
html.Div(
[
html.Div(
dcc.RadioItems(
id="color-metric",
options=[
{
"label": "Sentiment",
"value": "negative_rate",
},
{
"label": "Resolution",
"value": "unresolved_rate",
},
{
"label": "Urgency",
"value": "urgent_rate",
},
],
value="negative_rate",
inline=True,
className="radio-group",
inputClassName="radio-input",
labelClassName="radio-label",
),
className="radio-container",
),
],
className="control-options-row",
),
],
className="chart-controls",
),
],
className="chart-container",
),
# Right side: Interactive sidebar with topic details
html.Div(
[
html.Div(
[
html.H3(
"Topic Details", className="section-header"
),
html.Div(
id="topic-title", className="topic-title"
),
html.Div(
[
html.Div(
[
html.H4(
"Metadata",
className="subsection-header",
),
html.Div(
id="topic-metadata",
className="metadata-container",
),
],
className="metadata-section",
),
html.Div(
[
html.H4(
"Key Metrics",
className="subsection-header",
),
html.Div(
id="topic-metrics",
className="metrics-container",
),
],
className="metrics-section",
),
# Added Tags section
html.Div(
[
html.H4(
"Tags",
className="subsection-header",
),
html.Div(
id="important-tags",
className="tags-container",
),
]
),
],
className="details-section",
),
html.Div(
[
html.H4(
"Sample Dialogs (Summary)",
className="subsection-header",
),
html.Div(
id="sample-dialogs",
className="sample-dialogs-container",
),
],
className="samples-section",
),
],
className="topic-details-content",
),
html.Div(
id="no-topic-selected",
children=[
html.Div(
[
html.I(
className="fas fa-info-circle info-icon"
),
html.H3("No topic selected"),
html.P(
"Click or hover on a bubble to view topic details."
),
],
className="no-selection-message",
)
],
className="no-selection-container",
),
],
className="sidebar-container",
),
],
className="dashboard-container",
)
],
id="main-content",
style={"display": "none"},
),
# Store the processed data
dcc.Store(id="stored-data"),
],
className="app-container",
)
# Define CSS for the app
app.index_string = """
<!DOCTYPE html>
<html>
<head>
{%metas%}
<title>Sessions Observatory by helvia.ai πŸ”­πŸ“Š</title>
{%favicon%}
{%css%}
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
:root {
--background: hsl(210, 20%, 95%);
--foreground: hsl(222.2, 84%, 4.9%);
--card: hsl(0, 0%, 100%);
--card-foreground: hsl(222.2, 84%, 4.9%);
--popover: hsl(0, 0%, 100%);
--popover-foreground: hsl(222.2, 84%, 4.9%);
--primary: hsl(222.2, 47.4%, 11.2%);
--primary-foreground: hsl(210, 40%, 98%);
--secondary: hsl(210, 40%, 96.1%);
--secondary-foreground: hsl(222.2, 47.4%, 11.2%);
--muted: hsl(210, 40%, 96.1%);
--muted-foreground: hsl(215.4, 16.3%, 46.9%);
--accent: hsl(210, 40%, 96.1%);
--accent-foreground: hsl(222.2, 47.4%, 11.2%);
--destructive: hsl(0, 84.2%, 60.2%);
--destructive-foreground: hsl(210, 40%, 98%);
--border: hsl(214.3, 31.8%, 91.4%);
--input: hsl(214.3, 31.8%, 91.4%);
--ring: hsl(222.2, 84%, 4.9%);
--radius: 0.5rem;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: 'Inter', sans-serif;
}
body {
background-color: var(--background);
color: var(--foreground);
font-feature-settings: "rlig" 1, "calt" 1;
}
.app-container {
max-width: 2500px;
margin: 0 auto;
padding: 1.5rem;
background-color: var(--background);
min-height: 100vh;
display: flex;
flex-direction: column;
}
.header-container {
margin-bottom: 2rem;
text-align: center;
}
.app-header {
color: var(--foreground);
margin-bottom: 0.75rem;
font-weight: 600;
font-size: 2rem;
line-height: 1.2;
}
.app-description {
color: var(--muted-foreground);
font-size: 1rem;
line-height: 1.5;
}
.upload-container {
margin-bottom: 2rem;
max-width: 800px;
margin-left: auto;
margin-right: auto;
}
.upload-content {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 80%;
padding: 1.5rem;
position: relative;
}
.upload-text {
font-size: 1rem;
color: var(--primary);
font-weight: 500;
}
.upload-divider {
color: var(--muted-foreground);
margin: 0.5rem 0;
font-size: 0.875rem;
}
.upload-button {
background-color: var(--primary);
color: var(--primary-foreground);
border: none;
padding: 0.5rem 1rem;
border-radius: var(--radius);
font-size: 0.875rem;
cursor: pointer;
transition: opacity 0.2s;
font-weight: 500;
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
height: 2.5rem;
}
.upload-button:hover {
opacity: 0.9;
}
/* Status message styling */
.upload-status-message {
margin-top: 1rem;
padding: 0.75rem;
font-weight: 500;
text-align: center;
border-radius: var(--radius);
font-size: 0.875rem;
transition: all 0.3s ease;
background-color: var(--secondary);
color: var(--secondary-foreground);
}
/* Chart controls styling */
.chart-controls {
margin-top: 1rem;
display: flex;
flex-direction: column;
gap: 0.75rem;
padding: 1rem;
background-color: var(--card);
border-radius: var(--radius);
border: 1px solid var(--border);
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
}
.control-labels-row {
display: flex;
width: 100%;
}
.control-options-row {
display: flex;
width: 100%;
}
.control-label-container {
padding: 0 0.5rem;
text-align: left;
}
.control-label {
font-weight: 500;
color: var(--foreground);
font-size: 0.875rem;
line-height: 1.25rem;
}
.radio-container {
padding: 0 0.5rem;
width: 100%;
}
.radio-group {
display: flex;
gap: 1rem;
}
.radio-input {
margin-right: 0.375rem;
cursor: pointer;
height: 1rem;
width: 1rem;
border-radius: 9999px;
border: 1px solid var(--border);
appearance: none;
-webkit-appearance: none;
background-color: var(--background);
transition: border-color 0.2s;
}
.radio-input:checked {
border-color: var(--primary);
background-color: var(--primary);
background-image: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
background-size: 100% 100%;
background-position: center;
background-repeat: no-repeat;
}
.radio-label {
font-weight: 400;
color: var(--foreground);
display: flex;
align-items: center;
cursor: pointer;
font-size: 0.875rem;
line-height: 1.25rem;
}
/* Dashboard container */
.dashboard-container {
display: flex;
flex-wrap: wrap;
gap: 1.5rem;
flex: 1;
height: 100%;
}
.chart-container {
flex: 2.75;
min-width: 400px;
background: var(--card);
border-radius: var(--radius);
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
padding: 1rem;
border: 0.75px solid var(--border);
height: 100%;
}
.sidebar-container {
flex: 1;
min-width: 300px;
background: var(--card);
border-radius: var(--radius);
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
padding: 1rem;
position: relative;
height: 100vh;
overflow-y: auto;
border: 1px solid var(--border);
height: 100%;
}
.section-header {
margin-bottom: 1rem;
color: var(--foreground);
border-bottom: 1px solid var(--border);
padding-bottom: 0.75rem;
font-weight: 600;
font-size: 1.25rem;
}
.subsection-header {
margin: 1rem 0 0.75rem;
color: var(--foreground);
font-size: 1rem;
font-weight: 600;
}
.topic-title {
font-size: 1.25rem;
font-weight: 600;
color: var(--foreground);
margin-bottom: 1rem;
padding: 0.5rem 0.75rem;
background-color: var(--secondary);
border-radius: var(--radius);
}
.metadata-container {
display: flex;
flex-wrap: wrap;
gap: 0.75rem;
margin-bottom: 1rem;
}
.metadata-item {
background-color: var(--secondary);
padding: 0.5rem 0.75rem;
border-radius: var(--radius);
font-size: 0.875rem;
display: flex;
align-items: center;
color: var(--secondary-foreground);
}
.metadata-icon {
margin-right: 0.5rem;
color: var(--primary);
}
.metrics-container {
display: flex;
justify-content: space-between;
gap: 0.75rem;
margin-bottom: 0.75rem;
}
.metric-box {
background-color: var(--card);
border-radius: var(--radius);
padding: 0.75rem;
text-align: center;
flex: 1;
border: 1px solid var(--border);
}
.metric-box.negative {
border-left: 3px solid var(--destructive);
}
.metric-box.unresolved {
border-left: 3px solid hsl(47.9, 95.8%, 53.1%);
}
.metric-box.urgent {
border-left: 3px solid hsl(217.2, 91.2%, 59.8%);
}
.metric-value {
font-size: 1.5rem;
font-weight: 600;
margin-bottom: 0.25rem;
color: var(--foreground);
line-height: 1;
}
.metric-label {
font-size: 0.75rem;
color: var(--muted-foreground);
}
.sample-dialogs-container {
margin-top: 0.75rem;
}
.dialog-item {
background-color: var(--secondary);
border-radius: var(--radius);
padding: 1rem;
margin-bottom: 0.75rem;
border-left: 3px solid var(--primary);
}
.dialog-summary {
font-size: 0.875rem;
line-height: 1.5;
margin-bottom: 0.5rem;
color: var(--foreground);
}
.dialog-metadata {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-top: 0.5rem;
font-size: 0.75rem;
}
.dialog-tag {
padding: 0.25rem 0.5rem;
border-radius: var(--radius);
font-size: 0.7rem;
font-weight: 500;
}
.tag-sentiment {
background-color: var(--destructive);
color: var(--destructive-foreground);
}
.tag-resolution {
background-color: hsl(47.9, 95.8%, 53.1%);
color: hsl(222.2, 84%, 4.9%);
}
.tag-urgency {
background-color: hsl(217.2, 91.2%, 59.8%);
color: hsl(210, 40%, 98%);
}
.tag-chat-id {
background-color: hsl(215.4, 16.3%, 46.9%);
color: hsl(210, 40%, 98%);
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
font-weight: 500;
}
.no-selection-container {
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
display: flex;
align-items: center;
justify-content: center;
background-color: hsla(0, 0%, 100%, 0.95);
z-index: 10;
border-radius: var(--radius);
}
.no-selection-message {
text-align: center;
color: var(--muted-foreground);
padding: 1.5rem;
}
.info-icon {
font-size: 2rem;
margin-bottom: 0.75rem;
color: var(--muted);
}
/* Tags container */
.tags-container {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 5px;
margin-bottom: 15px;
padding: 6px;
border-radius: 8px;
background-color: #f8f9fa;
}
.topic-tag {
padding: 0.375rem 0.75rem;
border-radius: var(--radius);
font-size: 0.75rem;
display: inline-flex;
align-items: center;
transition: all 0.2s ease;
font-weight: 500;
margin-bottom: 0.25rem;
cursor: default;
background-color: var(--muted);
color: var(--muted-foreground);
border: 1px solid var(--border);
}
.topic-tag {
padding: 6px 12px;
border-radius: 15px;
font-size: 0.8rem;
display: inline-flex;
align-items: center;
box-shadow: 0 1px 3px rgba(0,0,0,0.12);
transition: all 0.2s ease;
font-weight: 500;
margin-bottom: 5px;
cursor: default;
border: 1px solid rgba(0,0,0,0.08);
background-color: #6c757d; /* Consistent medium gray color */
color: white;
}
.topic-tag:hover {
transform: translateY(-1px);
box-shadow: 0 3px 5px rgba(0,0,0,0.15);
background-color: #5a6268; /* Slightly darker on hover */
}
.topic-tag-icon {
margin-right: 5px;
font-size: 0.7rem;
opacity: 0.8;
color: rgba(255, 255, 255, 0.9);
}
.no-tags-message {
color: var(--muted-foreground);
font-style: italic;
padding: 0.75rem;
text-align: center;
width: 100%;
}
/* Responsive adjustments */
@media (max-width: 768px) {
.dashboard-container {
flex-direction: column;
}
.chart-container, .sidebar-container {
width: 100%;
}
.app-header {
font-size: 1.5rem;
}
}
</style>
</head>
<body>
{%app_entry%}
<footer>
{%config%}
{%scripts%}
{%renderer%}
</footer>
</body>
</html>
"""
@callback(
Output("topic-distribution-header", "children"),
Input("stored-data", "data"),
)
def update_topic_distribution_header(data):
if not data:
return "Sessions Observatory" # Default when no data is available
df = pd.DataFrame(data)
total_dialogs = df["count"].sum() # Sum up the 'count' column
return f"Sessions Observatory ({total_dialogs} dialogs)"
# Define callback to process uploaded file
@callback(
[
Output("stored-data", "data"),
Output("upload-status", "children"),
Output("upload-status", "style"), # Add style output for visibility
Output("main-content", "style"),
],
[Input("upload-data", "contents")],
[State("upload-data", "filename")],
)
def process_upload(contents, filename):
if contents is None:
return None, "", {"display": "none"}, {"display": "none"} # Keep hidden
try:
# Parse uploaded file
content_type, content_string = contents.split(",")
decoded = base64.b64decode(content_string)
if "csv" in filename.lower():
df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
elif "xls" in filename.lower():
df = pd.read_excel(io.BytesIO(decoded))
else:
return (
None,
html.Div(
[
html.I(
className="fas fa-exclamation-circle",
style={"color": "var(--destructive)", "marginRight": "8px"},
),
"Please upload a CSV or Excel file.",
],
style={"color": "var(--destructive)"},
),
{"display": "block"}, # Make visible after error
{"display": "none"},
)
# Process the dataframe to get topic statistics
topic_stats = analyze_topics(df)
return (
topic_stats.to_dict("records"),
html.Div(
[
html.I(
className="fas fa-check-circle",
style={
"color": "hsl(142.1, 76.2%, 36.3%)",
"marginRight": "8px",
},
),
f'Successfully uploaded "{filename}"',
],
style={"color": "hsl(142.1, 76.2%, 36.3%)"},
),
{"display": "block"}, # maybe add the above line here too #TODO
{
"display": "block",
"height": "calc(100vh - 40px)",
}, # Make visible after successful upload
)
except Exception as e:
return (
None,
html.Div(
[
html.I(
className="fas fa-exclamation-triangle",
style={"color": "var(--destructive)", "marginRight": "8px"},
),
f"Error processing file: {str(e)}",
],
style={"color": "var(--destructive)"},
),
{"display": "block"}, # Make visible after error
{"display": "none"},
)
# Function to analyze the topics and create statistics
def analyze_topics(df):
# Group by topic name and calculate metrics
topic_stats = (
df.groupby("deduplicated_topic_name")
.agg(
count=("id", "count"),
negative_count=("Sentiment", lambda x: (x == "negative").sum()),
unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()),
urgent_count=("Urgency", lambda x: (x == "urgent").sum()),
)
.reset_index()
)
# Calculate rates
topic_stats["negative_rate"] = (
topic_stats["negative_count"] / topic_stats["count"] * 100
).round(1)
topic_stats["unresolved_rate"] = (
topic_stats["unresolved_count"] / topic_stats["count"] * 100
).round(1)
topic_stats["urgent_rate"] = (
topic_stats["urgent_count"] / topic_stats["count"] * 100
).round(1)
# Apply binned layout
topic_stats = apply_binned_layout(topic_stats)
return topic_stats
# New binned layout function
def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
"""
Apply a binned layout where bubbles are grouped into rows based on dialog count.
Bubbles in each row will be centered horizontally.
Args:
df: DataFrame containing the topic data
padding: Padding from edges as percentage
bin_config: List of tuples defining bin ranges and descriptions.
Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
max_items_per_row: Maximum number of items to display in a single row
Returns:
DataFrame with updated x, y positions
"""
# Create a copy of the dataframe to avoid modifying the original
df_sorted = df.copy()
# Default bin configuration if none is provided
# 8 rows x 6 bubbles is usually good
if bin_config is None:
bin_config = [
(100, None, "100+ dialogs"),
(50, 99, "50-99 dialogs"),
(25, 49, "25-49 dialogs"),
(9, 24, "9-24 dialogs"),
(7, 8, "7-8 dialogs"),
(5, 7, "5-6 dialogs"),
(4, 4, "4 dialogs"),
(0, 3, "0-3 dialogs"),
]
# Generate bin descriptions and conditions dynamically
bin_descriptions = {}
conditions = []
bin_values = []
for i, (lower, upper, description) in enumerate(bin_config):
bin_name = f"Bin {i + 1}"
bin_descriptions[bin_name] = description
bin_values.append(bin_name)
if upper is None: # No upper limit
conditions.append(df_sorted["count"] >= lower)
else:
conditions.append(
(df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
)
# Apply the conditions to create the bin column
df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
# Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
# Now split bins that have more than max_items_per_row items
original_bins = df_sorted["bin"].unique()
new_rows = []
new_bin_descriptions = bin_descriptions.copy()
for bin_name in original_bins:
bin_mask = df_sorted["bin"] == bin_name
bin_group = df_sorted[bin_mask]
bin_size = len(bin_group)
# If bin has more items than max_items_per_row, split it
if bin_size > max_items_per_row:
# Calculate how many sub-bins we need
num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
# Calculate items per sub-bin (distribute evenly)
items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
# Distribute the remainder one by one to achieve balance
remainder = bin_size % num_sub_bins
for i in range(remainder):
items_per_sub_bin[i] += 1
# Original bin description
original_description = bin_descriptions[bin_name]
# Create new row entries and update bin assignments
start_idx = 0
for i in range(num_sub_bins):
# Create new bin name with sub-bin index
new_bin_name = f"{bin_name}_{i + 1}"
# Create new bin description with sub-bin index
new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
new_bin_descriptions[new_bin_name] = new_description
# Get slice of dataframe for this sub-bin
end_idx = start_idx + items_per_sub_bin[i]
sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
# Update bin name and description
sub_bin_rows["bin"] = new_bin_name
sub_bin_rows["bin_description"] = new_description
# Add to new rows
new_rows.append(sub_bin_rows)
# Update start index for next iteration
start_idx = end_idx
# Remove the original bin from df_sorted
df_sorted = df_sorted[~bin_mask]
# Combine the original dataframe (with small bins) and the new split bins
if new_rows:
df_sorted = pd.concat([df_sorted] + new_rows)
# Re-sort with the new bin names
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
# Calculate the vertical positions for each row (bin)
bins_with_topics = sorted(df_sorted["bin"].unique())
num_rows = len(bins_with_topics)
available_height = 100 - (2 * padding)
row_height = available_height / num_rows
# Calculate and assign y-positions (vertical positions)
row_positions = {}
for i, bin_name in enumerate(bins_with_topics):
# Calculate row position (centered within its allocated space)
row_pos = padding + i * row_height + (row_height / 2)
row_positions[bin_name] = row_pos
df_sorted["y"] = df_sorted["bin"].map(row_positions)
# Center the bubbles in each row horizontally
center_point = 50 # Middle of the chart (0-100 scale)
for bin_name in bins_with_topics:
# Get topics in this bin
bin_mask = df_sorted["bin"] == bin_name
num_topics_in_bin = bin_mask.sum()
if num_topics_in_bin == 1:
# If there's only one bubble, place it in the center
df_sorted.loc[bin_mask, "x"] = center_point
else:
if num_topics_in_bin < max_items_per_row:
# For fewer bubbles, add a little bit of spacing between them
# Calculate the total width needed
total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
# Calculate starting position (to center the group)
start_pos = center_point - (total_width / 2)
# Assign positions
positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
df_sorted.loc[bin_mask, "x"] = positions
else:
# For multiple bubbles, distribute them evenly around the center
# Calculate the total width needed
total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles
# Calculate starting position (to center the group)
start_pos = center_point - (total_width / 2)
# Assign positions
positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
df_sorted.loc[bin_mask, "x"] = positions
# Add original rank for reference
df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
return df_sorted
# New function to update positions based on selected size metric
def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
# For the main chart, we always use the binned layout
return apply_binned_layout(df)
# Callback to update the bubble chart
@callback(
Output("bubble-chart", "figure"),
[
Input("stored-data", "data"),
Input("color-metric", "value"),
],
)
def update_bubble_chart(data, color_metric):
if not data:
return go.Figure()
df = pd.DataFrame(data)
# Update positions using binned layout
df = update_bubble_positions(df)
# Always use count for sizing
size_values = df["count"]
raw_sizes = df["count"]
size_title = "Dialog Count"
# Apply log scaling to the size values for better visualization
# To make the smallest bubble bigger, increase the min_size value (currently 2.5).
min_size = 1 # Minimum bubble size
if size_values.max() > size_values.min():
# Log-scale the sizes
log_sizes = np.log1p(size_values)
# Scale to a reasonable range for visualization
# To make the biggest bubble smaller, reduce the multiplier (currently 50).
size_values = (
min_size
+ (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
)
else:
# If all values are the same, use a default size
size_values = np.ones(len(df)) * 12.5
# DEBUG: Print sizes of bubbles in the first and second bins
bins = sorted(df["bin"].unique())
if len(bins) >= 1:
first_bin = bins[0]
print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
first_bin_df = df[df["bin"] == first_bin]
for idx, row in first_bin_df.iterrows():
print(
f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
)
if len(bins) >= 2:
second_bin = bins[1]
print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
second_bin_df = df[df["bin"] == second_bin]
for idx, row in second_bin_df.iterrows():
print(
f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
)
# Determine color based on selected metric
if color_metric == "negative_rate":
color_values = df["negative_rate"]
# color_title = "Negative Sentiment (%)"
color_title = "Negativity (%)"
# color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
# color_scale = "Portland"
# color_scale = "RdYlGn_r"
# color_scale = "Teal"
color_scale = "Teal"
elif color_metric == "unresolved_rate":
color_values = df["unresolved_rate"]
color_title = "Unresolved (%)"
# color_scale = "Burg" # og is YlOrRd
# color_scale = "Temps"
# color_scale = "Armyrose"
# color_scale = "YlOrRd"
color_scale = "Teal"
else:
color_values = df["urgent_rate"]
color_title = "Urgency (%)"
# color_scale = "Magenta" # og is Blues
# color_scale = "Tealrose"
# color_scale = "Portland"
color_scale = "Teal"
# Set all text positions to bottom for consistent layout
text_positions = ["bottom center"] * len(df)
# Create enhanced hover text that includes bin information
hover_text = [
f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
for topic, raw, color, bin_desc in zip(
df["deduplicated_topic_name"],
raw_sizes,
color_values,
df["bin_description"],
)
]
# Create bubble chart
fig = px.scatter(
df,
x="x",
y="y",
size=size_values,
color=color_values,
# text="deduplicated_topic_name", # Remove text here
hover_name="deduplicated_topic_name",
hover_data={
"x": False,
"y": False,
"bin_description": True,
},
size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
color_continuous_scale=color_scale,
custom_data=[
"deduplicated_topic_name",
"count",
"negative_rate",
"unresolved_rate",
"urgent_rate",
"bin_description",
],
)
# Update traces: Remove text related properties
fig.update_traces(
mode="markers", # Remove '+text'
marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
hovertemplate="%{hovertext}<extra></extra>",
hovertext=hover_text,
)
# Create annotations for the bubbles
annotations = []
for i, row in df.iterrows():
# Wrap text every 2 words
words = row["deduplicated_topic_name"].split()
wrapped_text = "<br>".join(
[" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
)
# Calculate size for vertical offset (approximately based on the bubble size)
# Add vertical offset based on bubble size to place text below the bubble
marker_size = (
size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
) # Adjust this divisor as needed to get proper spacing
annotations.append(
dict(
x=row["x"],
y=row["y"]
+ 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
+ marker_size, # Add vertical offset to position text below the bubble
text=wrapped_text,
showarrow=False,
textangle=0,
font=dict(
size=10,
# size=8,
color="var(--foreground)",
family="Arial, sans-serif",
weight="bold",
),
xanchor="center",
yanchor="top", # Anchor to top of text box so it hangs below the bubble
bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
borderwidth=1,
borderpad=1,
# TODO: Radius for rounded corners
)
)
# Add bin labels and separator lines
unique_bins = sorted(df["bin"].unique())
bin_y_positions = [
df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
]
# Dynamically extract bin descriptions
bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
for bin_name, bin_y in zip(unique_bins, bin_y_positions):
# Add horizontal line
fig.add_shape(
type="line",
x0=0,
y0=bin_y,
x1=100,
y1=bin_y,
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
layer="below",
)
# Add subtle lines for each bin and bin labels
for bin_name, bin_y in zip(unique_bins, bin_y_positions):
# Add horizontal line
fig.add_shape(
type="line",
x0=0,
y0=bin_y,
x1=100,
y1=bin_y,
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
layer="below",
)
# Add bin label annotation
annotations.append(
dict(
x=0, # Position the label on the left side
y=bin_y,
xref="x",
yref="y",
text=bin_descriptions[bin_name],
showarrow=False,
font=dict(size=8.25, color="var(--muted-foreground)"),
align="left",
xanchor="left",
yanchor="middle",
bgcolor="rgba(255,255,255,0.7)",
borderpad=1,
)
)
fig.update_layout(
title=None,
xaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
title=None,
range=[0, 100],
),
yaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
title=None,
range=[0, 100],
autorange="reversed", # Keep largest at top
),
hovermode="closest",
margin=dict(l=0, r=0, t=10, b=10),
coloraxis_colorbar=dict(
title=color_title,
title_font=dict(size=9),
tickfont=dict(size=8),
thickness=10,
len=0.6,
yanchor="middle",
y=0.5,
xpad=0,
),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
annotations=annotations, # Add bin labels as annotations
)
return fig
# Update the update_topic_details callback to use grayscale colors for tags based on frequency
@callback(
[
Output("topic-title", "children"),
Output("topic-metadata", "children"),
Output("topic-metrics", "children"),
Output("important-tags", "children"),
Output("sample-dialogs", "children"),
Output("no-topic-selected", "style"),
],
[Input("bubble-chart", "hoverData"), Input("bubble-chart", "clickData")],
[State("stored-data", "data"), State("upload-data", "contents")],
)
def update_topic_details(hover_data, click_data, stored_data, file_contents):
# Determine which data to use (prioritize click over hover)
hover_info = hover_data or click_data
if not hover_info or not stored_data or not file_contents:
return "", [], [], "", [], {"display": "flex"}
# Extract topic name from the hover data
topic_name = hover_info["points"][0]["customdata"][0]
# Get stored data for this topic
df_stored = pd.DataFrame(stored_data)
topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
# Get original data to sample conversations
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded))
else: # Assume CSV
df_full = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
# Filter to this topic
topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
# Create the title
title = html.Div([html.Span(topic_name)])
# Create metadata items
metadata_items = [
html.Div(
[
html.I(className="fas fa-comments metadata-icon"),
html.Span(f"{int(topic_data['count'])} dialogs"),
],
className="metadata-item",
),
]
# Create metrics boxes
metrics_boxes = [
html.Div(
[
html.Div(f"{topic_data['negative_rate']}%", className="metric-value"),
html.Div("Negative Sentiment", className="metric-label"),
],
className="metric-box negative",
),
html.Div(
[
html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"),
html.Div("Unresolved", className="metric-label"),
],
className="metric-box unresolved",
),
html.Div(
[
html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"),
html.Div("Urgent", className="metric-label"),
],
className="metric-box urgent",
),
]
# New: Extract and process consolidated_tags with improved styling
tags_list = []
for _, row in topic_conversations.iterrows():
tags_str = row.get("consolidated_tags", "")
if pd.notna(tags_str):
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
tags_list.extend(tags)
# Count tag frequencies for better insight
tag_counts = {}
for tag in tags_list:
tag_counts[tag] = tag_counts.get(tag, 0) + 1
# Sort by frequency (most common first) and then alphabetically for ties
sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
# Keep only the top K tags
TOP_K = 15
sorted_tags = sorted_tags[:TOP_K]
if sorted_tags:
# Create beautifully styled tags with count indicators and consistent color
tags_output = html.Div(
[
html.Div(
[
html.I(className="fas fa-tag topic-tag-icon"),
html.Span(f"{tag} ({count})"),
],
className="topic-tag",
)
for tag, count in sorted_tags
],
className="tags-container",
)
else:
tags_output = html.Div(
[
html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
"No tags found for this topic",
],
className="no-tags-message",
)
# Sample up to 5 random dialogs
sample_size = min(5, len(topic_conversations))
if sample_size > 0:
sample_indices = random.sample(range(len(topic_conversations)), sample_size)
samples = topic_conversations.iloc[sample_indices]
dialog_items = []
for _, row in samples.iterrows():
# Create dialog item with tags
sentiment_tag = html.Span(
row["Sentiment"], className="dialog-tag tag-sentiment"
)
resolution_tag = html.Span(
row["Resolution"], className="dialog-tag tag-resolution"
)
urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")
# Add Chat ID tag if 'id' column exists
chat_id_tag = None
if "id" in row:
chat_id_tag = html.Span(
f"Chat ID: {row['id']}", className="dialog-tag tag-chat-id"
)
# Compile all tags, including the new Chat ID tag if available
tags = [sentiment_tag, resolution_tag, urgency_tag]
if chat_id_tag:
tags.append(chat_id_tag)
dialog_items.append(
html.Div(
[
html.Div(row["Summary"], className="dialog-summary"),
html.Div(
tags,
className="dialog-metadata",
),
],
className="dialog-item",
)
)
sample_dialogs = dialog_items
else:
sample_dialogs = [
html.Div(
"No sample dialogs available for this topic.",
style={"color": "var(--muted-foreground)"},
)
]
return (
title,
metadata_items,
metrics_boxes,
tags_output,
sample_dialogs,
{"display": "none"},
)
if __name__ == "__main__":
app.run_server(debug=False)