Spaces:
Running
Running
import base64 | |
import io | |
import random | |
import dash | |
import numpy as np | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from dash import Input, Output, State, callback, dcc, html | |
# Initialize the Dash app | |
app = dash.Dash(__name__, suppress_callback_exceptions=True) | |
server = app.server | |
# Define app layout | |
app.layout = html.Div( | |
[ | |
# Header | |
html.Div( | |
[ | |
html.H1( | |
"Sessions Observatory by helvia.ai ππ", | |
className="app-header", | |
), | |
html.P( | |
"Upload a CSV/Excel file to visualize the chatbot's dialog topics.", | |
className="app-description", | |
), | |
], | |
className="header-container", | |
), | |
# File Upload Component | |
html.Div( | |
[ | |
dcc.Upload( | |
id="upload-data", | |
children=html.Div( | |
[ | |
html.Div("Drag and Drop", className="upload-text"), | |
html.Div("or", className="upload-divider"), | |
html.Div( | |
html.Button("Select a File", className="upload-button") | |
), | |
], | |
className="upload-content", | |
), | |
style={ | |
"width": "100%", | |
"height": "120px", | |
"lineHeight": "60px", | |
"borderWidth": "1px", | |
"borderStyle": "dashed", | |
"borderRadius": "0.5rem", | |
"textAlign": "center", | |
"margin": "10px 0", | |
"backgroundColor": "hsl(210, 40%, 98%)", | |
"borderColor": "hsl(214.3, 31.8%, 91.4%)", | |
"cursor": "pointer", | |
}, | |
multiple=False, | |
), | |
# Status message with more padding and emphasis | |
html.Div( | |
id="upload-status", | |
className="upload-status-message", | |
style={"display": "none"}, # Initially hidden | |
), | |
], | |
className="upload-container", | |
), | |
# Main Content Area (hidden until file is uploaded) | |
html.Div( | |
[ | |
# Dashboard layout with flexible grid | |
html.Div( | |
[ | |
# Left side: Bubble chart | |
html.Div( | |
[ | |
html.H3( | |
id="topic-distribution-header", | |
children="Sessions Observatory", | |
className="section-header", | |
), | |
# dcc.Graph(id="bubble-chart", style={"height": "80vh"}), | |
dcc.Graph( | |
id="bubble-chart", | |
style={"height": "calc(100% - 154px)"}, | |
), # this does not work for some reason | |
html.Div( | |
[ | |
# Only keep Color by | |
html.Div( | |
[ | |
html.Div( | |
html.Label( | |
"Color by:", | |
className="control-label", | |
), | |
className="control-label-container", | |
), | |
], | |
className="control-labels-row", | |
), | |
# Only keep Color by options | |
html.Div( | |
[ | |
html.Div( | |
dcc.RadioItems( | |
id="color-metric", | |
options=[ | |
{ | |
"label": "Sentiment", | |
"value": "negative_rate", | |
}, | |
{ | |
"label": "Resolution", | |
"value": "unresolved_rate", | |
}, | |
{ | |
"label": "Urgency", | |
"value": "urgent_rate", | |
}, | |
], | |
value="negative_rate", | |
inline=True, | |
className="radio-group", | |
inputClassName="radio-input", | |
labelClassName="radio-label", | |
), | |
className="radio-container", | |
), | |
], | |
className="control-options-row", | |
), | |
], | |
className="chart-controls", | |
), | |
], | |
className="chart-container", | |
), | |
# Right side: Interactive sidebar with topic details | |
html.Div( | |
[ | |
html.Div( | |
[ | |
html.H3( | |
"Topic Details", className="section-header" | |
), | |
html.Div( | |
id="topic-title", className="topic-title" | |
), | |
html.Div( | |
[ | |
html.Div( | |
[ | |
html.H4( | |
"Metadata", | |
className="subsection-header", | |
), | |
html.Div( | |
id="topic-metadata", | |
className="metadata-container", | |
), | |
], | |
className="metadata-section", | |
), | |
html.Div( | |
[ | |
html.H4( | |
"Key Metrics", | |
className="subsection-header", | |
), | |
html.Div( | |
id="topic-metrics", | |
className="metrics-container", | |
), | |
], | |
className="metrics-section", | |
), | |
# Added Tags section | |
html.Div( | |
[ | |
html.H4( | |
"Tags", | |
className="subsection-header", | |
), | |
html.Div( | |
id="important-tags", | |
className="tags-container", | |
), | |
] | |
), | |
], | |
className="details-section", | |
), | |
html.Div( | |
[ | |
html.H4( | |
"Sample Dialogs (Summary)", | |
className="subsection-header", | |
), | |
html.Div( | |
id="sample-dialogs", | |
className="sample-dialogs-container", | |
), | |
], | |
className="samples-section", | |
), | |
], | |
className="topic-details-content", | |
), | |
html.Div( | |
id="no-topic-selected", | |
children=[ | |
html.Div( | |
[ | |
html.I( | |
className="fas fa-info-circle info-icon" | |
), | |
html.H3("No topic selected"), | |
html.P( | |
"Click or hover on a bubble to view topic details." | |
), | |
], | |
className="no-selection-message", | |
) | |
], | |
className="no-selection-container", | |
), | |
], | |
className="sidebar-container", | |
), | |
], | |
className="dashboard-container", | |
) | |
], | |
id="main-content", | |
style={"display": "none"}, | |
), | |
# Store the processed data | |
dcc.Store(id="stored-data"), | |
], | |
className="app-container", | |
) | |
# Define CSS for the app | |
app.index_string = """ | |
<!DOCTYPE html> | |
<html> | |
<head> | |
{%metas%} | |
<title>Sessions Observatory by helvia.ai ππ</title> | |
{%favicon%} | |
{%css%} | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
<style> | |
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); | |
:root { | |
--background: hsl(210, 20%, 95%); | |
--foreground: hsl(222.2, 84%, 4.9%); | |
--card: hsl(0, 0%, 100%); | |
--card-foreground: hsl(222.2, 84%, 4.9%); | |
--popover: hsl(0, 0%, 100%); | |
--popover-foreground: hsl(222.2, 84%, 4.9%); | |
--primary: hsl(222.2, 47.4%, 11.2%); | |
--primary-foreground: hsl(210, 40%, 98%); | |
--secondary: hsl(210, 40%, 96.1%); | |
--secondary-foreground: hsl(222.2, 47.4%, 11.2%); | |
--muted: hsl(210, 40%, 96.1%); | |
--muted-foreground: hsl(215.4, 16.3%, 46.9%); | |
--accent: hsl(210, 40%, 96.1%); | |
--accent-foreground: hsl(222.2, 47.4%, 11.2%); | |
--destructive: hsl(0, 84.2%, 60.2%); | |
--destructive-foreground: hsl(210, 40%, 98%); | |
--border: hsl(214.3, 31.8%, 91.4%); | |
--input: hsl(214.3, 31.8%, 91.4%); | |
--ring: hsl(222.2, 84%, 4.9%); | |
--radius: 0.5rem; | |
} | |
* { | |
margin: 0; | |
padding: 0; | |
box-sizing: border-box; | |
font-family: 'Inter', sans-serif; | |
} | |
body { | |
background-color: var(--background); | |
color: var(--foreground); | |
font-feature-settings: "rlig" 1, "calt" 1; | |
} | |
.app-container { | |
max-width: 2500px; | |
margin: 0 auto; | |
padding: 1.5rem; | |
background-color: var(--background); | |
min-height: 100vh; | |
display: flex; | |
flex-direction: column; | |
} | |
.header-container { | |
margin-bottom: 2rem; | |
text-align: center; | |
} | |
.app-header { | |
color: var(--foreground); | |
margin-bottom: 0.75rem; | |
font-weight: 600; | |
font-size: 2rem; | |
line-height: 1.2; | |
} | |
.app-description { | |
color: var(--muted-foreground); | |
font-size: 1rem; | |
line-height: 1.5; | |
} | |
.upload-container { | |
margin-bottom: 2rem; | |
max-width: 800px; | |
margin-left: auto; | |
margin-right: auto; | |
} | |
.upload-content { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
justify-content: center; | |
height: 80%; | |
padding: 1.5rem; | |
position: relative; | |
} | |
.upload-text { | |
font-size: 1rem; | |
color: var(--primary); | |
font-weight: 500; | |
} | |
.upload-divider { | |
color: var(--muted-foreground); | |
margin: 0.5rem 0; | |
font-size: 0.875rem; | |
} | |
.upload-button { | |
background-color: var(--primary); | |
color: var(--primary-foreground); | |
border: none; | |
padding: 0.5rem 1rem; | |
border-radius: var(--radius); | |
font-size: 0.875rem; | |
cursor: pointer; | |
transition: opacity 0.2s; | |
font-weight: 500; | |
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); | |
height: 2.5rem; | |
} | |
.upload-button:hover { | |
opacity: 0.9; | |
} | |
/* Status message styling */ | |
.upload-status-message { | |
margin-top: 1rem; | |
padding: 0.75rem; | |
font-weight: 500; | |
text-align: center; | |
border-radius: var(--radius); | |
font-size: 0.875rem; | |
transition: all 0.3s ease; | |
background-color: var(--secondary); | |
color: var(--secondary-foreground); | |
} | |
/* Chart controls styling */ | |
.chart-controls { | |
margin-top: 1rem; | |
display: flex; | |
flex-direction: column; | |
gap: 0.75rem; | |
padding: 1rem; | |
background-color: var(--card); | |
border-radius: var(--radius); | |
border: 1px solid var(--border); | |
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); | |
} | |
.control-labels-row { | |
display: flex; | |
width: 100%; | |
} | |
.control-options-row { | |
display: flex; | |
width: 100%; | |
} | |
.control-label-container { | |
padding: 0 0.5rem; | |
text-align: left; | |
} | |
.control-label { | |
font-weight: 500; | |
color: var(--foreground); | |
font-size: 0.875rem; | |
line-height: 1.25rem; | |
} | |
.radio-container { | |
padding: 0 0.5rem; | |
width: 100%; | |
} | |
.radio-group { | |
display: flex; | |
gap: 1rem; | |
} | |
.radio-input { | |
margin-right: 0.375rem; | |
cursor: pointer; | |
height: 1rem; | |
width: 1rem; | |
border-radius: 9999px; | |
border: 1px solid var(--border); | |
appearance: none; | |
-webkit-appearance: none; | |
background-color: var(--background); | |
transition: border-color 0.2s; | |
} | |
.radio-input:checked { | |
border-color: var(--primary); | |
background-color: var(--primary); | |
background-image: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e"); | |
background-size: 100% 100%; | |
background-position: center; | |
background-repeat: no-repeat; | |
} | |
.radio-label { | |
font-weight: 400; | |
color: var(--foreground); | |
display: flex; | |
align-items: center; | |
cursor: pointer; | |
font-size: 0.875rem; | |
line-height: 1.25rem; | |
} | |
/* Dashboard container */ | |
.dashboard-container { | |
display: flex; | |
flex-wrap: wrap; | |
gap: 1.5rem; | |
flex: 1; | |
height: 100%; | |
} | |
.chart-container { | |
flex: 2.75; | |
min-width: 400px; | |
background: var(--card); | |
border-radius: var(--radius); | |
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); | |
padding: 1rem; | |
border: 0.75px solid var(--border); | |
height: 100%; | |
} | |
.sidebar-container { | |
flex: 1; | |
min-width: 300px; | |
background: var(--card); | |
border-radius: var(--radius); | |
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1); | |
padding: 1rem; | |
position: relative; | |
height: 100vh; | |
overflow-y: auto; | |
border: 1px solid var(--border); | |
height: 100%; | |
} | |
.section-header { | |
margin-bottom: 1rem; | |
color: var(--foreground); | |
border-bottom: 1px solid var(--border); | |
padding-bottom: 0.75rem; | |
font-weight: 600; | |
font-size: 1.25rem; | |
} | |
.subsection-header { | |
margin: 1rem 0 0.75rem; | |
color: var(--foreground); | |
font-size: 1rem; | |
font-weight: 600; | |
} | |
.topic-title { | |
font-size: 1.25rem; | |
font-weight: 600; | |
color: var(--foreground); | |
margin-bottom: 1rem; | |
padding: 0.5rem 0.75rem; | |
background-color: var(--secondary); | |
border-radius: var(--radius); | |
} | |
.metadata-container { | |
display: flex; | |
flex-wrap: wrap; | |
gap: 0.75rem; | |
margin-bottom: 1rem; | |
} | |
.metadata-item { | |
background-color: var(--secondary); | |
padding: 0.5rem 0.75rem; | |
border-radius: var(--radius); | |
font-size: 0.875rem; | |
display: flex; | |
align-items: center; | |
color: var(--secondary-foreground); | |
} | |
.metadata-icon { | |
margin-right: 0.5rem; | |
color: var(--primary); | |
} | |
.metrics-container { | |
display: flex; | |
justify-content: space-between; | |
gap: 0.75rem; | |
margin-bottom: 0.75rem; | |
} | |
.metric-box { | |
background-color: var(--card); | |
border-radius: var(--radius); | |
padding: 0.75rem; | |
text-align: center; | |
flex: 1; | |
border: 1px solid var(--border); | |
} | |
.metric-box.negative { | |
border-left: 3px solid var(--destructive); | |
} | |
.metric-box.unresolved { | |
border-left: 3px solid hsl(47.9, 95.8%, 53.1%); | |
} | |
.metric-box.urgent { | |
border-left: 3px solid hsl(217.2, 91.2%, 59.8%); | |
} | |
.metric-value { | |
font-size: 1.5rem; | |
font-weight: 600; | |
margin-bottom: 0.25rem; | |
color: var(--foreground); | |
line-height: 1; | |
} | |
.metric-label { | |
font-size: 0.75rem; | |
color: var(--muted-foreground); | |
} | |
.sample-dialogs-container { | |
margin-top: 0.75rem; | |
} | |
.dialog-item { | |
background-color: var(--secondary); | |
border-radius: var(--radius); | |
padding: 1rem; | |
margin-bottom: 0.75rem; | |
border-left: 3px solid var(--primary); | |
} | |
.dialog-summary { | |
font-size: 0.875rem; | |
line-height: 1.5; | |
margin-bottom: 0.5rem; | |
color: var(--foreground); | |
} | |
.dialog-metadata { | |
display: flex; | |
flex-wrap: wrap; | |
gap: 0.5rem; | |
margin-top: 0.5rem; | |
font-size: 0.75rem; | |
} | |
.dialog-tag { | |
padding: 0.25rem 0.5rem; | |
border-radius: var(--radius); | |
font-size: 0.7rem; | |
font-weight: 500; | |
} | |
.tag-sentiment { | |
background-color: var(--destructive); | |
color: var(--destructive-foreground); | |
} | |
.tag-resolution { | |
background-color: hsl(47.9, 95.8%, 53.1%); | |
color: hsl(222.2, 84%, 4.9%); | |
} | |
.tag-urgency { | |
background-color: hsl(217.2, 91.2%, 59.8%); | |
color: hsl(210, 40%, 98%); | |
} | |
.tag-chat-id { | |
background-color: hsl(215.4, 16.3%, 46.9%); | |
color: hsl(210, 40%, 98%); | |
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; | |
font-weight: 500; | |
} | |
.no-selection-container { | |
position: absolute; | |
top: 0; | |
left: 0; | |
right: 0; | |
bottom: 0; | |
display: flex; | |
align-items: center; | |
justify-content: center; | |
background-color: hsla(0, 0%, 100%, 0.95); | |
z-index: 10; | |
border-radius: var(--radius); | |
} | |
.no-selection-message { | |
text-align: center; | |
color: var(--muted-foreground); | |
padding: 1.5rem; | |
} | |
.info-icon { | |
font-size: 2rem; | |
margin-bottom: 0.75rem; | |
color: var(--muted); | |
} | |
/* Tags container */ | |
.tags-container { | |
display: flex; | |
flex-wrap: wrap; | |
gap: 8px; | |
margin-top: 5px; | |
margin-bottom: 15px; | |
padding: 6px; | |
border-radius: 8px; | |
background-color: #f8f9fa; | |
} | |
.topic-tag { | |
padding: 0.375rem 0.75rem; | |
border-radius: var(--radius); | |
font-size: 0.75rem; | |
display: inline-flex; | |
align-items: center; | |
transition: all 0.2s ease; | |
font-weight: 500; | |
margin-bottom: 0.25rem; | |
cursor: default; | |
background-color: var(--muted); | |
color: var(--muted-foreground); | |
border: 1px solid var(--border); | |
} | |
.topic-tag { | |
padding: 6px 12px; | |
border-radius: 15px; | |
font-size: 0.8rem; | |
display: inline-flex; | |
align-items: center; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.12); | |
transition: all 0.2s ease; | |
font-weight: 500; | |
margin-bottom: 5px; | |
cursor: default; | |
border: 1px solid rgba(0,0,0,0.08); | |
background-color: #6c757d; /* Consistent medium gray color */ | |
color: white; | |
} | |
.topic-tag:hover { | |
transform: translateY(-1px); | |
box-shadow: 0 3px 5px rgba(0,0,0,0.15); | |
background-color: #5a6268; /* Slightly darker on hover */ | |
} | |
.topic-tag-icon { | |
margin-right: 5px; | |
font-size: 0.7rem; | |
opacity: 0.8; | |
color: rgba(255, 255, 255, 0.9); | |
} | |
.no-tags-message { | |
color: var(--muted-foreground); | |
font-style: italic; | |
padding: 0.75rem; | |
text-align: center; | |
width: 100%; | |
} | |
/* Responsive adjustments */ | |
@media (max-width: 768px) { | |
.dashboard-container { | |
flex-direction: column; | |
} | |
.chart-container, .sidebar-container { | |
width: 100%; | |
} | |
.app-header { | |
font-size: 1.5rem; | |
} | |
} | |
</style> | |
</head> | |
<body> | |
{%app_entry%} | |
<footer> | |
{%config%} | |
{%scripts%} | |
{%renderer%} | |
</footer> | |
</body> | |
</html> | |
""" | |
def update_topic_distribution_header(data): | |
if not data: | |
return "Sessions Observatory" # Default when no data is available | |
df = pd.DataFrame(data) | |
total_dialogs = df["count"].sum() # Sum up the 'count' column | |
return f"Sessions Observatory ({total_dialogs} dialogs)" | |
# Define callback to process uploaded file | |
def process_upload(contents, filename): | |
if contents is None: | |
return None, "", {"display": "none"}, {"display": "none"} # Keep hidden | |
try: | |
# Parse uploaded file | |
content_type, content_string = contents.split(",") | |
decoded = base64.b64decode(content_string) | |
if "csv" in filename.lower(): | |
df = pd.read_csv(io.StringIO(decoded.decode("utf-8"))) | |
elif "xls" in filename.lower(): | |
df = pd.read_excel(io.BytesIO(decoded)) | |
else: | |
return ( | |
None, | |
html.Div( | |
[ | |
html.I( | |
className="fas fa-exclamation-circle", | |
style={"color": "var(--destructive)", "marginRight": "8px"}, | |
), | |
"Please upload a CSV or Excel file.", | |
], | |
style={"color": "var(--destructive)"}, | |
), | |
{"display": "block"}, # Make visible after error | |
{"display": "none"}, | |
) | |
# Process the dataframe to get topic statistics | |
topic_stats = analyze_topics(df) | |
return ( | |
topic_stats.to_dict("records"), | |
html.Div( | |
[ | |
html.I( | |
className="fas fa-check-circle", | |
style={ | |
"color": "hsl(142.1, 76.2%, 36.3%)", | |
"marginRight": "8px", | |
}, | |
), | |
f'Successfully uploaded "{filename}"', | |
], | |
style={"color": "hsl(142.1, 76.2%, 36.3%)"}, | |
), | |
{"display": "block"}, # maybe add the above line here too #TODO | |
{ | |
"display": "block", | |
"height": "calc(100vh - 40px)", | |
}, # Make visible after successful upload | |
) | |
except Exception as e: | |
return ( | |
None, | |
html.Div( | |
[ | |
html.I( | |
className="fas fa-exclamation-triangle", | |
style={"color": "var(--destructive)", "marginRight": "8px"}, | |
), | |
f"Error processing file: {str(e)}", | |
], | |
style={"color": "var(--destructive)"}, | |
), | |
{"display": "block"}, # Make visible after error | |
{"display": "none"}, | |
) | |
# Function to analyze the topics and create statistics | |
def analyze_topics(df): | |
# Group by topic name and calculate metrics | |
topic_stats = ( | |
df.groupby("deduplicated_topic_name") | |
.agg( | |
count=("id", "count"), | |
negative_count=("Sentiment", lambda x: (x == "negative").sum()), | |
unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()), | |
urgent_count=("Urgency", lambda x: (x == "urgent").sum()), | |
) | |
.reset_index() | |
) | |
# Calculate rates | |
topic_stats["negative_rate"] = ( | |
topic_stats["negative_count"] / topic_stats["count"] * 100 | |
).round(1) | |
topic_stats["unresolved_rate"] = ( | |
topic_stats["unresolved_count"] / topic_stats["count"] * 100 | |
).round(1) | |
topic_stats["urgent_rate"] = ( | |
topic_stats["urgent_count"] / topic_stats["count"] * 100 | |
).round(1) | |
# Apply binned layout | |
topic_stats = apply_binned_layout(topic_stats) | |
return topic_stats | |
# New binned layout function | |
def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6): | |
""" | |
Apply a binned layout where bubbles are grouped into rows based on dialog count. | |
Bubbles in each row will be centered horizontally. | |
Args: | |
df: DataFrame containing the topic data | |
padding: Padding from edges as percentage | |
bin_config: List of tuples defining bin ranges and descriptions. | |
Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...] | |
max_items_per_row: Maximum number of items to display in a single row | |
Returns: | |
DataFrame with updated x, y positions | |
""" | |
# Create a copy of the dataframe to avoid modifying the original | |
df_sorted = df.copy() | |
# Default bin configuration if none is provided | |
# 8 rows x 6 bubbles is usually good | |
if bin_config is None: | |
bin_config = [ | |
(100, None, "100+ dialogs"), | |
(50, 99, "50-99 dialogs"), | |
(25, 49, "25-49 dialogs"), | |
(9, 24, "9-24 dialogs"), | |
(7, 8, "7-8 dialogs"), | |
(5, 7, "5-6 dialogs"), | |
(4, 4, "4 dialogs"), | |
(0, 3, "0-3 dialogs"), | |
] | |
# Generate bin descriptions and conditions dynamically | |
bin_descriptions = {} | |
conditions = [] | |
bin_values = [] | |
for i, (lower, upper, description) in enumerate(bin_config): | |
bin_name = f"Bin {i + 1}" | |
bin_descriptions[bin_name] = description | |
bin_values.append(bin_name) | |
if upper is None: # No upper limit | |
conditions.append(df_sorted["count"] >= lower) | |
else: | |
conditions.append( | |
(df_sorted["count"] >= lower) & (df_sorted["count"] <= upper) | |
) | |
# Apply the conditions to create the bin column | |
df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8") | |
df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions) | |
# Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin | |
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False]) | |
# Now split bins that have more than max_items_per_row items | |
original_bins = df_sorted["bin"].unique() | |
new_rows = [] | |
new_bin_descriptions = bin_descriptions.copy() | |
for bin_name in original_bins: | |
bin_mask = df_sorted["bin"] == bin_name | |
bin_group = df_sorted[bin_mask] | |
bin_size = len(bin_group) | |
# If bin has more items than max_items_per_row, split it | |
if bin_size > max_items_per_row: | |
# Calculate how many sub-bins we need | |
num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row | |
# Calculate items per sub-bin (distribute evenly) | |
items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins | |
# Distribute the remainder one by one to achieve balance | |
remainder = bin_size % num_sub_bins | |
for i in range(remainder): | |
items_per_sub_bin[i] += 1 | |
# Original bin description | |
original_description = bin_descriptions[bin_name] | |
# Create new row entries and update bin assignments | |
start_idx = 0 | |
for i in range(num_sub_bins): | |
# Create new bin name with sub-bin index | |
new_bin_name = f"{bin_name}_{i + 1}" | |
# Create new bin description with sub-bin index | |
new_description = f"{original_description} ({i + 1}/{num_sub_bins})" | |
new_bin_descriptions[new_bin_name] = new_description | |
# Get slice of dataframe for this sub-bin | |
end_idx = start_idx + items_per_sub_bin[i] | |
sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy() | |
# Update bin name and description | |
sub_bin_rows["bin"] = new_bin_name | |
sub_bin_rows["bin_description"] = new_description | |
# Add to new rows | |
new_rows.append(sub_bin_rows) | |
# Update start index for next iteration | |
start_idx = end_idx | |
# Remove the original bin from df_sorted | |
df_sorted = df_sorted[~bin_mask] | |
# Combine the original dataframe (with small bins) and the new split bins | |
if new_rows: | |
df_sorted = pd.concat([df_sorted] + new_rows) | |
# Re-sort with the new bin names | |
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False]) | |
# Calculate the vertical positions for each row (bin) | |
bins_with_topics = sorted(df_sorted["bin"].unique()) | |
num_rows = len(bins_with_topics) | |
available_height = 100 - (2 * padding) | |
row_height = available_height / num_rows | |
# Calculate and assign y-positions (vertical positions) | |
row_positions = {} | |
for i, bin_name in enumerate(bins_with_topics): | |
# Calculate row position (centered within its allocated space) | |
row_pos = padding + i * row_height + (row_height / 2) | |
row_positions[bin_name] = row_pos | |
df_sorted["y"] = df_sorted["bin"].map(row_positions) | |
# Center the bubbles in each row horizontally | |
center_point = 50 # Middle of the chart (0-100 scale) | |
for bin_name in bins_with_topics: | |
# Get topics in this bin | |
bin_mask = df_sorted["bin"] == bin_name | |
num_topics_in_bin = bin_mask.sum() | |
if num_topics_in_bin == 1: | |
# If there's only one bubble, place it in the center | |
df_sorted.loc[bin_mask, "x"] = center_point | |
else: | |
if num_topics_in_bin < max_items_per_row: | |
# For fewer bubbles, add a little bit of spacing between them | |
# Calculate the total width needed | |
total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles | |
# Calculate starting position (to center the group) | |
start_pos = center_point - (total_width / 2) | |
# Assign positions | |
positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)] | |
df_sorted.loc[bin_mask, "x"] = positions | |
else: | |
# For multiple bubbles, distribute them evenly around the center | |
# Calculate the total width needed | |
total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles | |
# Calculate starting position (to center the group) | |
start_pos = center_point - (total_width / 2) | |
# Assign positions | |
positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)] | |
df_sorted.loc[bin_mask, "x"] = positions | |
# Add original rank for reference | |
df_sorted["size_rank"] = range(1, len(df_sorted) + 1) | |
return df_sorted | |
# New function to update positions based on selected size metric | |
def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame: | |
# For the main chart, we always use the binned layout | |
return apply_binned_layout(df) | |
# Callback to update the bubble chart | |
def update_bubble_chart(data, color_metric): | |
if not data: | |
return go.Figure() | |
df = pd.DataFrame(data) | |
# Update positions using binned layout | |
df = update_bubble_positions(df) | |
# Always use count for sizing | |
size_values = df["count"] | |
raw_sizes = df["count"] | |
size_title = "Dialog Count" | |
# Apply log scaling to the size values for better visualization | |
# To make the smallest bubble bigger, increase the min_size value (currently 2.5). | |
min_size = 1 # Minimum bubble size | |
if size_values.max() > size_values.min(): | |
# Log-scale the sizes | |
log_sizes = np.log1p(size_values) | |
# Scale to a reasonable range for visualization | |
# To make the biggest bubble smaller, reduce the multiplier (currently 50). | |
size_values = ( | |
min_size | |
+ (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50 | |
) | |
else: | |
# If all values are the same, use a default size | |
size_values = np.ones(len(df)) * 12.5 | |
# DEBUG: Print sizes of bubbles in the first and second bins | |
bins = sorted(df["bin"].unique()) | |
if len(bins) >= 1: | |
first_bin = bins[0] | |
print(f"DEBUG - First bin '{first_bin}' bubble sizes:") | |
first_bin_df = df[df["bin"] == first_bin] | |
for idx, row in first_bin_df.iterrows(): | |
print( | |
f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}" | |
) | |
if len(bins) >= 2: | |
second_bin = bins[1] | |
print(f"DEBUG - Second bin '{second_bin}' bubble sizes:") | |
second_bin_df = df[df["bin"] == second_bin] | |
for idx, row in second_bin_df.iterrows(): | |
print( | |
f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}" | |
) | |
# Determine color based on selected metric | |
if color_metric == "negative_rate": | |
color_values = df["negative_rate"] | |
# color_title = "Negative Sentiment (%)" | |
color_title = "Negativity (%)" | |
# color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too | |
# color_scale = "Portland" | |
# color_scale = "RdYlGn_r" | |
# color_scale = "Teal" | |
color_scale = "Teal" | |
elif color_metric == "unresolved_rate": | |
color_values = df["unresolved_rate"] | |
color_title = "Unresolved (%)" | |
# color_scale = "Burg" # og is YlOrRd | |
# color_scale = "Temps" | |
# color_scale = "Armyrose" | |
# color_scale = "YlOrRd" | |
color_scale = "Teal" | |
else: | |
color_values = df["urgent_rate"] | |
color_title = "Urgency (%)" | |
# color_scale = "Magenta" # og is Blues | |
# color_scale = "Tealrose" | |
# color_scale = "Portland" | |
color_scale = "Teal" | |
# Set all text positions to bottom for consistent layout | |
text_positions = ["bottom center"] * len(df) | |
# Create enhanced hover text that includes bin information | |
hover_text = [ | |
f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}" | |
for topic, raw, color, bin_desc in zip( | |
df["deduplicated_topic_name"], | |
raw_sizes, | |
color_values, | |
df["bin_description"], | |
) | |
] | |
# Create bubble chart | |
fig = px.scatter( | |
df, | |
x="x", | |
y="y", | |
size=size_values, | |
color=color_values, | |
# text="deduplicated_topic_name", # Remove text here | |
hover_name="deduplicated_topic_name", | |
hover_data={ | |
"x": False, | |
"y": False, | |
"bin_description": True, | |
}, | |
size_max=42.5, # Maximum size of the bubbles, change this to adjust the size | |
color_continuous_scale=color_scale, | |
custom_data=[ | |
"deduplicated_topic_name", | |
"count", | |
"negative_rate", | |
"unresolved_rate", | |
"urgent_rate", | |
"bin_description", | |
], | |
) | |
# Update traces: Remove text related properties | |
fig.update_traces( | |
mode="markers", # Remove '+text' | |
marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")), | |
hovertemplate="%{hovertext}<extra></extra>", | |
hovertext=hover_text, | |
) | |
# Create annotations for the bubbles | |
annotations = [] | |
for i, row in df.iterrows(): | |
# Wrap text every 2 words | |
words = row["deduplicated_topic_name"].split() | |
wrapped_text = "<br>".join( | |
[" ".join(words[i : i + 4]) for i in range(0, len(words), 4)] | |
) | |
# Calculate size for vertical offset (approximately based on the bubble size) | |
# Add vertical offset based on bubble size to place text below the bubble | |
marker_size = ( | |
size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20 | |
) # Adjust this divisor as needed to get proper spacing | |
annotations.append( | |
dict( | |
x=row["x"], | |
y=row["y"] | |
+ 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label | |
+ marker_size, # Add vertical offset to position text below the bubble | |
text=wrapped_text, | |
showarrow=False, | |
textangle=0, | |
font=dict( | |
size=10, | |
# size=8, | |
color="var(--foreground)", | |
family="Arial, sans-serif", | |
weight="bold", | |
), | |
xanchor="center", | |
yanchor="top", # Anchor to top of text box so it hangs below the bubble | |
bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability | |
bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color | |
borderwidth=1, | |
borderpad=1, | |
# TODO: Radius for rounded corners | |
) | |
) | |
# Add bin labels and separator lines | |
unique_bins = sorted(df["bin"].unique()) | |
bin_y_positions = [ | |
df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins | |
] | |
# Dynamically extract bin descriptions | |
bin_descriptions = df.set_index("bin")["bin_description"].to_dict() | |
for bin_name, bin_y in zip(unique_bins, bin_y_positions): | |
# Add horizontal line | |
fig.add_shape( | |
type="line", | |
x0=0, | |
y0=bin_y, | |
x1=100, | |
y1=bin_y, | |
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"), | |
layer="below", | |
) | |
# Add subtle lines for each bin and bin labels | |
for bin_name, bin_y in zip(unique_bins, bin_y_positions): | |
# Add horizontal line | |
fig.add_shape( | |
type="line", | |
x0=0, | |
y0=bin_y, | |
x1=100, | |
y1=bin_y, | |
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"), | |
layer="below", | |
) | |
# Add bin label annotation | |
annotations.append( | |
dict( | |
x=0, # Position the label on the left side | |
y=bin_y, | |
xref="x", | |
yref="y", | |
text=bin_descriptions[bin_name], | |
showarrow=False, | |
font=dict(size=8.25, color="var(--muted-foreground)"), | |
align="left", | |
xanchor="left", | |
yanchor="middle", | |
bgcolor="rgba(255,255,255,0.7)", | |
borderpad=1, | |
) | |
) | |
fig.update_layout( | |
title=None, | |
xaxis=dict( | |
showgrid=False, | |
zeroline=False, | |
showticklabels=False, | |
title=None, | |
range=[0, 100], | |
), | |
yaxis=dict( | |
showgrid=False, | |
zeroline=False, | |
showticklabels=False, | |
title=None, | |
range=[0, 100], | |
autorange="reversed", # Keep largest at top | |
), | |
hovermode="closest", | |
margin=dict(l=0, r=0, t=10, b=10), | |
coloraxis_colorbar=dict( | |
title=color_title, | |
title_font=dict(size=9), | |
tickfont=dict(size=8), | |
thickness=10, | |
len=0.6, | |
yanchor="middle", | |
y=0.5, | |
xpad=0, | |
), | |
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), | |
paper_bgcolor="rgba(0,0,0,0)", | |
plot_bgcolor="rgba(0,0,0,0)", | |
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"), | |
annotations=annotations, # Add bin labels as annotations | |
) | |
return fig | |
# Update the update_topic_details callback to use grayscale colors for tags based on frequency | |
def update_topic_details(hover_data, click_data, stored_data, file_contents): | |
# Determine which data to use (prioritize click over hover) | |
hover_info = hover_data or click_data | |
if not hover_info or not stored_data or not file_contents: | |
return "", [], [], "", [], {"display": "flex"} | |
# Extract topic name from the hover data | |
topic_name = hover_info["points"][0]["customdata"][0] | |
# Get stored data for this topic | |
df_stored = pd.DataFrame(stored_data) | |
topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0] | |
# Get original data to sample conversations | |
content_type, content_string = file_contents.split(",") | |
decoded = base64.b64decode(content_string) | |
if ( | |
content_type | |
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64" | |
): | |
df_full = pd.read_excel(io.BytesIO(decoded)) | |
else: # Assume CSV | |
df_full = pd.read_csv(io.StringIO(decoded.decode("utf-8"))) | |
# Filter to this topic | |
topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name] | |
# Create the title | |
title = html.Div([html.Span(topic_name)]) | |
# Create metadata items | |
metadata_items = [ | |
html.Div( | |
[ | |
html.I(className="fas fa-comments metadata-icon"), | |
html.Span(f"{int(topic_data['count'])} dialogs"), | |
], | |
className="metadata-item", | |
), | |
] | |
# Create metrics boxes | |
metrics_boxes = [ | |
html.Div( | |
[ | |
html.Div(f"{topic_data['negative_rate']}%", className="metric-value"), | |
html.Div("Negative Sentiment", className="metric-label"), | |
], | |
className="metric-box negative", | |
), | |
html.Div( | |
[ | |
html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"), | |
html.Div("Unresolved", className="metric-label"), | |
], | |
className="metric-box unresolved", | |
), | |
html.Div( | |
[ | |
html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"), | |
html.Div("Urgent", className="metric-label"), | |
], | |
className="metric-box urgent", | |
), | |
] | |
# New: Extract and process consolidated_tags with improved styling | |
tags_list = [] | |
for _, row in topic_conversations.iterrows(): | |
tags_str = row.get("consolidated_tags", "") | |
if pd.notna(tags_str): | |
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()] | |
tags_list.extend(tags) | |
# Count tag frequencies for better insight | |
tag_counts = {} | |
for tag in tags_list: | |
tag_counts[tag] = tag_counts.get(tag, 0) + 1 | |
# Sort by frequency (most common first) and then alphabetically for ties | |
sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0])) | |
# Keep only the top K tags | |
TOP_K = 15 | |
sorted_tags = sorted_tags[:TOP_K] | |
if sorted_tags: | |
# Create beautifully styled tags with count indicators and consistent color | |
tags_output = html.Div( | |
[ | |
html.Div( | |
[ | |
html.I(className="fas fa-tag topic-tag-icon"), | |
html.Span(f"{tag} ({count})"), | |
], | |
className="topic-tag", | |
) | |
for tag, count in sorted_tags | |
], | |
className="tags-container", | |
) | |
else: | |
tags_output = html.Div( | |
[ | |
html.I(className="fas fa-info-circle", style={"marginRight": "5px"}), | |
"No tags found for this topic", | |
], | |
className="no-tags-message", | |
) | |
# Sample up to 5 random dialogs | |
sample_size = min(5, len(topic_conversations)) | |
if sample_size > 0: | |
sample_indices = random.sample(range(len(topic_conversations)), sample_size) | |
samples = topic_conversations.iloc[sample_indices] | |
dialog_items = [] | |
for _, row in samples.iterrows(): | |
# Create dialog item with tags | |
sentiment_tag = html.Span( | |
row["Sentiment"], className="dialog-tag tag-sentiment" | |
) | |
resolution_tag = html.Span( | |
row["Resolution"], className="dialog-tag tag-resolution" | |
) | |
urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency") | |
# Add Chat ID tag if 'id' column exists | |
chat_id_tag = None | |
if "id" in row: | |
chat_id_tag = html.Span( | |
f"Chat ID: {row['id']}", className="dialog-tag tag-chat-id" | |
) | |
# Compile all tags, including the new Chat ID tag if available | |
tags = [sentiment_tag, resolution_tag, urgency_tag] | |
if chat_id_tag: | |
tags.append(chat_id_tag) | |
dialog_items.append( | |
html.Div( | |
[ | |
html.Div(row["Summary"], className="dialog-summary"), | |
html.Div( | |
tags, | |
className="dialog-metadata", | |
), | |
], | |
className="dialog-item", | |
) | |
) | |
sample_dialogs = dialog_items | |
else: | |
sample_dialogs = [ | |
html.Div( | |
"No sample dialogs available for this topic.", | |
style={"color": "var(--muted-foreground)"}, | |
) | |
] | |
return ( | |
title, | |
metadata_items, | |
metrics_boxes, | |
tags_output, | |
sample_dialogs, | |
{"display": "none"}, | |
) | |
if __name__ == "__main__": | |
app.run_server(debug=False) | |