Spaces:

helvia
/

sessions-observatory

Running

File size: 54,966 Bytes

1b75011

import base64
import io
import random

import dash
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Input, Output, State, callback, dcc, html

# Initialize the Dash app
app = dash.Dash(__name__, suppress_callback_exceptions=True)
server = app.server


# Define app layout
app.layout = html.Div(
    [
        # Header
        html.Div(
            [
                html.H1(
                    "Sessions Observatory by helvia.ai 🔭📊",
                    className="app-header",
                ),
                html.P(
                    "Upload a CSV/Excel file to visualize the chatbot's dialog topics.",
                    className="app-description",
                ),
            ],
            className="header-container",
        ),
        # File Upload Component
        html.Div(
            [
                dcc.Upload(
                    id="upload-data",
                    children=html.Div(
                        [
                            html.Div("Drag and Drop", className="upload-text"),
                            html.Div("or", className="upload-divider"),
                            html.Div(
                                html.Button("Select a File", className="upload-button")
                            ),
                        ],
                        className="upload-content",
                    ),
                    style={
                        "width": "100%",
                        "height": "120px",
                        "lineHeight": "60px",
                        "borderWidth": "1px",
                        "borderStyle": "dashed",
                        "borderRadius": "0.5rem",
                        "textAlign": "center",
                        "margin": "10px 0",
                        "backgroundColor": "hsl(210, 40%, 98%)",
                        "borderColor": "hsl(214.3, 31.8%, 91.4%)",
                        "cursor": "pointer",
                    },
                    multiple=False,
                ),
                # Status message with more padding and emphasis
                html.Div(
                    id="upload-status",
                    className="upload-status-message",
                    style={"display": "none"},  # Initially hidden
                ),
            ],
            className="upload-container",
        ),
        # Main Content Area (hidden until file is uploaded)
        html.Div(
            [
                # Dashboard layout with flexible grid
                html.Div(
                    [
                        # Left side: Bubble chart
                        html.Div(
                            [
                                html.H3(
                                    id="topic-distribution-header",
                                    children="Sessions Observatory",
                                    className="section-header",
                                ),
                                # dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
                                dcc.Graph(
                                    id="bubble-chart",
                                    style={"height": "calc(100% - 154px)"},
                                ),  # this does not work for some reason
                                html.Div(
                                    [
                                        # Only keep Color by
                                        html.Div(
                                            [
                                                html.Div(
                                                    html.Label(
                                                        "Color by:",
                                                        className="control-label",
                                                    ),
                                                    className="control-label-container",
                                                ),
                                            ],
                                            className="control-labels-row",
                                        ),
                                        # Only keep Color by options
                                        html.Div(
                                            [
                                                html.Div(
                                                    dcc.RadioItems(
                                                        id="color-metric",
                                                        options=[
                                                            {
                                                                "label": "Sentiment",
                                                                "value": "negative_rate",
                                                            },
                                                            {
                                                                "label": "Resolution",
                                                                "value": "unresolved_rate",
                                                            },
                                                            {
                                                                "label": "Urgency",
                                                                "value": "urgent_rate",
                                                            },
                                                        ],
                                                        value="negative_rate",
                                                        inline=True,
                                                        className="radio-group",
                                                        inputClassName="radio-input",
                                                        labelClassName="radio-label",
                                                    ),
                                                    className="radio-container",
                                                ),
                                            ],
                                            className="control-options-row",
                                        ),
                                    ],
                                    className="chart-controls",
                                ),
                            ],
                            className="chart-container",
                        ),
                        # Right side: Interactive sidebar with topic details
                        html.Div(
                            [
                                html.Div(
                                    [
                                        html.H3(
                                            "Topic Details", className="section-header"
                                        ),
                                        html.Div(
                                            id="topic-title", className="topic-title"
                                        ),
                                        html.Div(
                                            [
                                                html.Div(
                                                    [
                                                        html.H4(
                                                            "Metadata",
                                                            className="subsection-header",
                                                        ),
                                                        html.Div(
                                                            id="topic-metadata",
                                                            className="metadata-container",
                                                        ),
                                                    ],
                                                    className="metadata-section",
                                                ),
                                                html.Div(
                                                    [
                                                        html.H4(
                                                            "Key Metrics",
                                                            className="subsection-header",
                                                        ),
                                                        html.Div(
                                                            id="topic-metrics",
                                                            className="metrics-container",
                                                        ),
                                                    ],
                                                    className="metrics-section",
                                                ),
                                                # Added Tags section
                                                html.Div(
                                                    [
                                                        html.H4(
                                                            "Tags",
                                                            className="subsection-header",
                                                        ),
                                                        html.Div(
                                                            id="important-tags",
                                                            className="tags-container",
                                                        ),
                                                    ]
                                                ),
                                            ],
                                            className="details-section",
                                        ),
                                        html.Div(
                                            [
                                                html.H4(
                                                    "Sample Dialogs (Summary)",
                                                    className="subsection-header",
                                                ),
                                                html.Div(
                                                    id="sample-dialogs",
                                                    className="sample-dialogs-container",
                                                ),
                                            ],
                                            className="samples-section",
                                        ),
                                    ],
                                    className="topic-details-content",
                                ),
                                html.Div(
                                    id="no-topic-selected",
                                    children=[
                                        html.Div(
                                            [
                                                html.I(
                                                    className="fas fa-info-circle info-icon"
                                                ),
                                                html.H3("No topic selected"),
                                                html.P(
                                                    "Click or hover on a bubble to view topic details."
                                                ),
                                            ],
                                            className="no-selection-message",
                                        )
                                    ],
                                    className="no-selection-container",
                                ),
                            ],
                            className="sidebar-container",
                        ),
                    ],
                    className="dashboard-container",
                )
            ],
            id="main-content",
            style={"display": "none"},
        ),
        # Store the processed data
        dcc.Store(id="stored-data"),
    ],
    className="app-container",
)

# Define CSS for the app
app.index_string = """
<!DOCTYPE html>
<html>
    <head>
        {%metas%}
        <title>Sessions Observatory by helvia.ai 🔭📊</title>
        {%favicon%}
        {%css%}
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
        <style>
            @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

            :root {
                --background: hsl(210, 20%, 95%);
                --foreground: hsl(222.2, 84%, 4.9%);
                --card: hsl(0, 0%, 100%);
                --card-foreground: hsl(222.2, 84%, 4.9%);
                --popover: hsl(0, 0%, 100%);
                --popover-foreground: hsl(222.2, 84%, 4.9%);
                --primary: hsl(222.2, 47.4%, 11.2%);
                --primary-foreground: hsl(210, 40%, 98%);
                --secondary: hsl(210, 40%, 96.1%);
                --secondary-foreground: hsl(222.2, 47.4%, 11.2%);
                --muted: hsl(210, 40%, 96.1%);
                --muted-foreground: hsl(215.4, 16.3%, 46.9%);
                --accent: hsl(210, 40%, 96.1%);
                --accent-foreground: hsl(222.2, 47.4%, 11.2%);
                --destructive: hsl(0, 84.2%, 60.2%);
                --destructive-foreground: hsl(210, 40%, 98%);
                --border: hsl(214.3, 31.8%, 91.4%);
                --input: hsl(214.3, 31.8%, 91.4%);
                --ring: hsl(222.2, 84%, 4.9%);
                --radius: 0.5rem;
            }

            * {
                margin: 0;
                padding: 0;
                box-sizing: border-box;
                font-family: 'Inter', sans-serif;
            }

            body {
                background-color: var(--background);
                color: var(--foreground);
                font-feature-settings: "rlig" 1, "calt" 1;
            }

            .app-container {
                max-width: 2500px;
                margin: 0 auto;
                padding: 1.5rem;
                background-color: var(--background);
                min-height: 100vh;
                display: flex;
                flex-direction: column;
            }

            .header-container {
                margin-bottom: 2rem;
                text-align: center;
            }

            .app-header {
                color: var(--foreground);
                margin-bottom: 0.75rem;
                font-weight: 600;
                font-size: 2rem;
                line-height: 1.2;
            }

            .app-description {
                color: var(--muted-foreground);
                font-size: 1rem;
                line-height: 1.5;
            }

            .upload-container {
                margin-bottom: 2rem;
                max-width: 800px;
                margin-left: auto;
                margin-right: auto;
            }

            .upload-content {
                display: flex;
                flex-direction: column;
                align-items: center;
                justify-content: center;
                height: 80%;
                padding: 1.5rem;
                position: relative;
            }

            .upload-text {
                font-size: 1rem;
                color: var(--primary);
                font-weight: 500;
            }

            .upload-divider {
                color: var(--muted-foreground);
                margin: 0.5rem 0;
                font-size: 0.875rem;
            }

            .upload-button {
                background-color: var(--primary);
                color: var(--primary-foreground);
                border: none;
                padding: 0.5rem 1rem;
                border-radius: var(--radius);
                font-size: 0.875rem;
                cursor: pointer;
                transition: opacity 0.2s;
                font-weight: 500;
                box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
                height: 2.5rem;
            }

            .upload-button:hover {
                opacity: 0.9;
            }

            /* Status message styling */
            .upload-status-message {
                margin-top: 1rem;
                padding: 0.75rem;
                font-weight: 500;
                text-align: center;
                border-radius: var(--radius);
                font-size: 0.875rem;
                transition: all 0.3s ease;
                background-color: var(--secondary);
                color: var(--secondary-foreground);
            }

            /* Chart controls styling */
            .chart-controls {
                margin-top: 1rem;
                display: flex;
                flex-direction: column;
                gap: 0.75rem;
                padding: 1rem;
                background-color: var(--card);
                border-radius: var(--radius);
                border: 1px solid var(--border);
                box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
            }

            .control-labels-row {
                display: flex;
                width: 100%;
            }

            .control-options-row {
                display: flex;
                width: 100%;
            }

            .control-label-container {
                padding: 0 0.5rem;
                text-align: left;
            }

            .control-label {
                font-weight: 500;
                color: var(--foreground);
                font-size: 0.875rem;
                line-height: 1.25rem;
            }

            .radio-container {
                padding: 0 0.5rem;
                width: 100%;
            }

            .radio-group {
                display: flex;
                gap: 1rem;
            }

            .radio-input {
                margin-right: 0.375rem;
                cursor: pointer;
                height: 1rem;
                width: 1rem;
                border-radius: 9999px;
                border: 1px solid var(--border);
                appearance: none;
                -webkit-appearance: none;
                background-color: var(--background);
                transition: border-color 0.2s;
            }

            .radio-input:checked {
                border-color: var(--primary);
                background-color: var(--primary);
                background-image: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
                background-size: 100% 100%;
                background-position: center;
                background-repeat: no-repeat;
            }

            .radio-label {
                font-weight: 400;
                color: var(--foreground);
                display: flex;
                align-items: center;
                cursor: pointer;
                font-size: 0.875rem;
                line-height: 1.25rem;
            }

            /* Dashboard container */
            .dashboard-container {
                display: flex;
                flex-wrap: wrap;
                gap: 1.5rem;
                flex: 1;
                height: 100%;
            }

            .chart-container {
                flex: 2.75;
                min-width: 400px;
                background: var(--card);
                border-radius: var(--radius);
                box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
                padding: 1rem;
                border: 0.75px solid var(--border);
                height: 100%;
            }

            .sidebar-container {
                flex: 1;
                min-width: 300px;
                background: var(--card);
                border-radius: var(--radius);
                box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
                padding: 1rem;
                position: relative;
                height: 100vh;
                overflow-y: auto;
                border: 1px solid var(--border);
                height: 100%;
            }

            .section-header {
                margin-bottom: 1rem;
                color: var(--foreground);
                border-bottom: 1px solid var(--border);
                padding-bottom: 0.75rem;
                font-weight: 600;
                font-size: 1.25rem;
            }

            .subsection-header {
                margin: 1rem 0 0.75rem;
                color: var(--foreground);
                font-size: 1rem;
                font-weight: 600;
            }

            .topic-title {
                font-size: 1.25rem;
                font-weight: 600;
                color: var(--foreground);
                margin-bottom: 1rem;
                padding: 0.5rem 0.75rem;
                background-color: var(--secondary);
                border-radius: var(--radius);
            }

            .metadata-container {
                display: flex;
                flex-wrap: wrap;
                gap: 0.75rem;
                margin-bottom: 1rem;
            }

            .metadata-item {
                background-color: var(--secondary);
                padding: 0.5rem 0.75rem;
                border-radius: var(--radius);
                font-size: 0.875rem;
                display: flex;
                align-items: center;
                color: var(--secondary-foreground);
            }

            .metadata-icon {
                margin-right: 0.5rem;
                color: var(--primary);
            }

            .metrics-container {
                display: flex;
                justify-content: space-between;
                gap: 0.75rem;
                margin-bottom: 0.75rem;
            }

            .metric-box {
                background-color: var(--card);
                border-radius: var(--radius);
                padding: 0.75rem;
                text-align: center;
                flex: 1;
                border: 1px solid var(--border);
            }

            .metric-box.negative {
                border-left: 3px solid var(--destructive);
            }

            .metric-box.unresolved {
                border-left: 3px solid hsl(47.9, 95.8%, 53.1%);
            }

            .metric-box.urgent {
                border-left: 3px solid hsl(217.2, 91.2%, 59.8%);
            }

            .metric-value {
                font-size: 1.5rem;
                font-weight: 600;
                margin-bottom: 0.25rem;
                color: var(--foreground);
                line-height: 1;
            }

            .metric-label {
                font-size: 0.75rem;
                color: var(--muted-foreground);
            }

            .sample-dialogs-container {
                margin-top: 0.75rem;
            }

            .dialog-item {
                background-color: var(--secondary);
                border-radius: var(--radius);
                padding: 1rem;
                margin-bottom: 0.75rem;
                border-left: 3px solid var(--primary);
            }

            .dialog-summary {
                font-size: 0.875rem;
                line-height: 1.5;
                margin-bottom: 0.5rem;
                color: var(--foreground);
            }

            .dialog-metadata {
                display: flex;
                flex-wrap: wrap;
                gap: 0.5rem;
                margin-top: 0.5rem;
                font-size: 0.75rem;
            }

            .dialog-tag {
                padding: 0.25rem 0.5rem;
                border-radius: var(--radius);
                font-size: 0.7rem;
                font-weight: 500;
            }

            .tag-sentiment {
                background-color: var(--destructive);
                color: var(--destructive-foreground);
            }

            .tag-resolution {
                background-color: hsl(47.9, 95.8%, 53.1%);
                color: hsl(222.2, 84%, 4.9%);
            }

            .tag-urgency {
                background-color: hsl(217.2, 91.2%, 59.8%);
                color: hsl(210, 40%, 98%);
            }

            .tag-chat-id {
                background-color: hsl(215.4, 16.3%, 46.9%);
                color: hsl(210, 40%, 98%);
                font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
                font-weight: 500;
            }

            .no-selection-container {
                position: absolute;
                top: 0;
                left: 0;
                right: 0;
                bottom: 0;
                display: flex;
                align-items: center;
                justify-content: center;
                background-color: hsla(0, 0%, 100%, 0.95);
                z-index: 10;
                border-radius: var(--radius);
            }

            .no-selection-message {
                text-align: center;
                color: var(--muted-foreground);
                padding: 1.5rem;
            }

            .info-icon {
                font-size: 2rem;
                margin-bottom: 0.75rem;
                color: var(--muted);
            }

            /* Tags container */
            .tags-container {
                display: flex;
                flex-wrap: wrap;
                gap: 8px;
                margin-top: 5px;
                margin-bottom: 15px;
                padding: 6px;
                border-radius: 8px;
                background-color: #f8f9fa;
            }


            .topic-tag {
                padding: 0.375rem 0.75rem;
                border-radius: var(--radius);
                font-size: 0.75rem;
                display: inline-flex;
                align-items: center;
                transition: all 0.2s ease;
                font-weight: 500;
                margin-bottom: 0.25rem;
                cursor: default;
                background-color: var(--muted);
                color: var(--muted-foreground);
                border: 1px solid var(--border);
            }

            .topic-tag {
                padding: 6px 12px;
                border-radius: 15px;
                font-size: 0.8rem;
                display: inline-flex;
                align-items: center;
                box-shadow: 0 1px 3px rgba(0,0,0,0.12);
                transition: all 0.2s ease;
                font-weight: 500;
                margin-bottom: 5px;
                cursor: default;
                border: 1px solid rgba(0,0,0,0.08);
                background-color: #6c757d;  /* Consistent medium gray color */
                color: white;
            }

            .topic-tag:hover {
                transform: translateY(-1px);
                box-shadow: 0 3px 5px rgba(0,0,0,0.15);
                background-color: #5a6268;  /* Slightly darker on hover */
            }

            .topic-tag-icon {
                margin-right: 5px;
                font-size: 0.7rem;
                opacity: 0.8;
                color: rgba(255, 255, 255, 0.9);
            }

            .no-tags-message {
                color: var(--muted-foreground);
                font-style: italic;
                padding: 0.75rem;
                text-align: center;
                width: 100%;
            }

            /* Responsive adjustments */
            @media (max-width: 768px) {
                .dashboard-container {
                    flex-direction: column;
                }
                .chart-container, .sidebar-container {
                    width: 100%;
                }
                .app-header {
                    font-size: 1.5rem;
                }
            }
        </style>
    </head>
    <body>
        {%app_entry%}
        <footer>
            {%config%}
            {%scripts%}
            {%renderer%}
        </footer>
    </body>
</html>
"""


@callback(
    Output("topic-distribution-header", "children"),
    Input("stored-data", "data"),
)
def update_topic_distribution_header(data):
    if not data:
        return "Sessions Observatory"  # Default when no data is available

    df = pd.DataFrame(data)
    total_dialogs = df["count"].sum()  # Sum up the 'count' column
    return f"Sessions Observatory ({total_dialogs} dialogs)"


# Define callback to process uploaded file
@callback(
    [
        Output("stored-data", "data"),
        Output("upload-status", "children"),
        Output("upload-status", "style"),  # Add style output for visibility
        Output("main-content", "style"),
    ],
    [Input("upload-data", "contents")],
    [State("upload-data", "filename")],
)
def process_upload(contents, filename):
    if contents is None:
        return None, "", {"display": "none"}, {"display": "none"}  # Keep hidden

    try:
        # Parse uploaded file
        content_type, content_string = contents.split(",")
        decoded = base64.b64decode(content_string)

        if "csv" in filename.lower():
            df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
        elif "xls" in filename.lower():
            df = pd.read_excel(io.BytesIO(decoded))
        else:
            return (
                None,
                html.Div(
                    [
                        html.I(
                            className="fas fa-exclamation-circle",
                            style={"color": "var(--destructive)", "marginRight": "8px"},
                        ),
                        "Please upload a CSV or Excel file.",
                    ],
                    style={"color": "var(--destructive)"},
                ),
                {"display": "block"},  # Make visible after error
                {"display": "none"},
            )

        # Process the dataframe to get topic statistics
        topic_stats = analyze_topics(df)

        return (
            topic_stats.to_dict("records"),
            html.Div(
                [
                    html.I(
                        className="fas fa-check-circle",
                        style={
                            "color": "hsl(142.1, 76.2%, 36.3%)",
                            "marginRight": "8px",
                        },
                    ),
                    f'Successfully uploaded "{filename}"',
                ],
                style={"color": "hsl(142.1, 76.2%, 36.3%)"},
            ),
            {"display": "block"},  # maybe add the above line here too #TODO
            {
                "display": "block",
                "height": "calc(100vh - 40px)",
            },  # Make visible after successful upload
        )

    except Exception as e:
        return (
            None,
            html.Div(
                [
                    html.I(
                        className="fas fa-exclamation-triangle",
                        style={"color": "var(--destructive)", "marginRight": "8px"},
                    ),
                    f"Error processing file: {str(e)}",
                ],
                style={"color": "var(--destructive)"},
            ),
            {"display": "block"},  # Make visible after error
            {"display": "none"},
        )


# Function to analyze the topics and create statistics
def analyze_topics(df):
    # Group by topic name and calculate metrics
    topic_stats = (
        df.groupby("deduplicated_topic_name")
        .agg(
            count=("id", "count"),
            negative_count=("Sentiment", lambda x: (x == "negative").sum()),
            unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()),
            urgent_count=("Urgency", lambda x: (x == "urgent").sum()),
        )
        .reset_index()
    )

    # Calculate rates
    topic_stats["negative_rate"] = (
        topic_stats["negative_count"] / topic_stats["count"] * 100
    ).round(1)
    topic_stats["unresolved_rate"] = (
        topic_stats["unresolved_count"] / topic_stats["count"] * 100
    ).round(1)
    topic_stats["urgent_rate"] = (
        topic_stats["urgent_count"] / topic_stats["count"] * 100
    ).round(1)

    # Apply binned layout
    topic_stats = apply_binned_layout(topic_stats)

    return topic_stats


# New binned layout function


def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
    """
    Apply a binned layout where bubbles are grouped into rows based on dialog count.
    Bubbles in each row will be centered horizontally.

    Args:
        df: DataFrame containing the topic data
        padding: Padding from edges as percentage
        bin_config: List of tuples defining bin ranges and descriptions.
                    Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
        max_items_per_row: Maximum number of items to display in a single row

    Returns:
        DataFrame with updated x, y positions
    """
    # Create a copy of the dataframe to avoid modifying the original
    df_sorted = df.copy()

    # Default bin configuration if none is provided
    # 8 rows x 6 bubbles is usually good
    if bin_config is None:
        bin_config = [
            (100, None, "100+ dialogs"),
            (50, 99, "50-99 dialogs"),
            (25, 49, "25-49 dialogs"),
            (9, 24, "9-24 dialogs"),
            (7, 8, "7-8 dialogs"),
            (5, 7, "5-6 dialogs"),
            (4, 4, "4 dialogs"),
            (0, 3, "0-3 dialogs"),
        ]

    # Generate bin descriptions and conditions dynamically
    bin_descriptions = {}
    conditions = []
    bin_values = []

    for i, (lower, upper, description) in enumerate(bin_config):
        bin_name = f"Bin {i + 1}"
        bin_descriptions[bin_name] = description
        bin_values.append(bin_name)

        if upper is None:  # No upper limit
            conditions.append(df_sorted["count"] >= lower)
        else:
            conditions.append(
                (df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
            )

    # Apply the conditions to create the bin column
    df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
    df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)

    # Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
    df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

    # Now split bins that have more than max_items_per_row items
    original_bins = df_sorted["bin"].unique()
    new_rows = []
    new_bin_descriptions = bin_descriptions.copy()

    for bin_name in original_bins:
        bin_mask = df_sorted["bin"] == bin_name
        bin_group = df_sorted[bin_mask]
        bin_size = len(bin_group)

        # If bin has more items than max_items_per_row, split it
        if bin_size > max_items_per_row:
            # Calculate how many sub-bins we need
            num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row

            # Calculate items per sub-bin (distribute evenly)
            items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins

            # Distribute the remainder one by one to achieve balance
            remainder = bin_size % num_sub_bins
            for i in range(remainder):
                items_per_sub_bin[i] += 1

            # Original bin description
            original_description = bin_descriptions[bin_name]

            # Create new row entries and update bin assignments
            start_idx = 0
            for i in range(num_sub_bins):
                # Create new bin name with sub-bin index
                new_bin_name = f"{bin_name}_{i + 1}"

                # Create new bin description with sub-bin index
                new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
                new_bin_descriptions[new_bin_name] = new_description

                # Get slice of dataframe for this sub-bin
                end_idx = start_idx + items_per_sub_bin[i]
                sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()

                # Update bin name and description
                sub_bin_rows["bin"] = new_bin_name
                sub_bin_rows["bin_description"] = new_description

                # Add to new rows
                new_rows.append(sub_bin_rows)

                # Update start index for next iteration
                start_idx = end_idx

            # Remove the original bin from df_sorted
            df_sorted = df_sorted[~bin_mask]

    # Combine the original dataframe (with small bins) and the new split bins
    if new_rows:
        df_sorted = pd.concat([df_sorted] + new_rows)

    # Re-sort with the new bin names
    df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

    # Calculate the vertical positions for each row (bin)
    bins_with_topics = sorted(df_sorted["bin"].unique())
    num_rows = len(bins_with_topics)

    available_height = 100 - (2 * padding)
    row_height = available_height / num_rows

    # Calculate and assign y-positions (vertical positions)
    row_positions = {}
    for i, bin_name in enumerate(bins_with_topics):
        # Calculate row position (centered within its allocated space)
        row_pos = padding + i * row_height + (row_height / 2)
        row_positions[bin_name] = row_pos

    df_sorted["y"] = df_sorted["bin"].map(row_positions)

    # Center the bubbles in each row horizontally
    center_point = 50  # Middle of the chart (0-100 scale)
    for bin_name in bins_with_topics:
        # Get topics in this bin
        bin_mask = df_sorted["bin"] == bin_name
        num_topics_in_bin = bin_mask.sum()

        if num_topics_in_bin == 1:
            # If there's only one bubble, place it in the center
            df_sorted.loc[bin_mask, "x"] = center_point
        else:
            if num_topics_in_bin < max_items_per_row:
                # For fewer bubbles, add a little bit of spacing between them
                # Calculate the total width needed
                total_width = (num_topics_in_bin - 1) * 17.5  # 10 units between bubbles
                # Calculate starting position (to center the group)
                start_pos = center_point - (total_width / 2)
                # Assign positions
                positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
                df_sorted.loc[bin_mask, "x"] = positions
            else:
                # For multiple bubbles, distribute them evenly around the center
                # Calculate the total width needed
                total_width = (num_topics_in_bin - 1) * 15  # 15 units between bubbles

                # Calculate starting position (to center the group)
                start_pos = center_point - (total_width / 2)

                # Assign positions
                positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
                df_sorted.loc[bin_mask, "x"] = positions

    # Add original rank for reference
    df_sorted["size_rank"] = range(1, len(df_sorted) + 1)

    return df_sorted


# New function to update positions based on selected size metric
def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
    # For the main chart, we always use the binned layout
    return apply_binned_layout(df)


# Callback to update the bubble chart
@callback(
    Output("bubble-chart", "figure"),
    [
        Input("stored-data", "data"),
        Input("color-metric", "value"),
    ],
)
def update_bubble_chart(data, color_metric):
    if not data:
        return go.Figure()

    df = pd.DataFrame(data)

    # Update positions using binned layout
    df = update_bubble_positions(df)

    # Always use count for sizing
    size_values = df["count"]
    raw_sizes = df["count"]
    size_title = "Dialog Count"

    # Apply log scaling to the size values for better visualization
    # To make the smallest bubble bigger, increase the min_size value (currently 2.5).
    min_size = 1  # Minimum bubble size
    if size_values.max() > size_values.min():
        # Log-scale the sizes
        log_sizes = np.log1p(size_values)
        # Scale to a reasonable range for visualization
        # To make the biggest bubble smaller, reduce the multiplier (currently 50).
        size_values = (
            min_size
            + (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
        )
    else:
        # If all values are the same, use a default size
        size_values = np.ones(len(df)) * 12.5

    # DEBUG: Print sizes of bubbles in the first and second bins
    bins = sorted(df["bin"].unique())
    if len(bins) >= 1:
        first_bin = bins[0]
        print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
        first_bin_df = df[df["bin"] == first_bin]
        for idx, row in first_bin_df.iterrows():
            print(
                f"  Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
            )

    if len(bins) >= 2:
        second_bin = bins[1]
        print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
        second_bin_df = df[df["bin"] == second_bin]
        for idx, row in second_bin_df.iterrows():
            print(
                f"  Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
            )

    # Determine color based on selected metric
    if color_metric == "negative_rate":
        color_values = df["negative_rate"]
        # color_title = "Negative Sentiment (%)"
        color_title = "Negativity (%)"
        # color_scale = "RdBu"  # no ice, RdBu - og is Reds - matter is good too
        # color_scale = "Portland"
        # color_scale = "RdYlGn_r"
        # color_scale = "Teal"
        color_scale = "Teal"

    elif color_metric == "unresolved_rate":
        color_values = df["unresolved_rate"]
        color_title = "Unresolved (%)"
        # color_scale = "Burg"  # og is YlOrRd
        # color_scale = "Temps"
        # color_scale = "Armyrose"
        # color_scale = "YlOrRd"
        color_scale = "Teal"
    else:
        color_values = df["urgent_rate"]
        color_title = "Urgency (%)"
        # color_scale = "Magenta"  # og is Blues
        # color_scale = "Tealrose"
        # color_scale = "Portland"
        color_scale = "Teal"

    # Set all text positions to bottom for consistent layout
    text_positions = ["bottom center"] * len(df)

    # Create enhanced hover text that includes bin information
    hover_text = [
        f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
        for topic, raw, color, bin_desc in zip(
            df["deduplicated_topic_name"],
            raw_sizes,
            color_values,
            df["bin_description"],
        )
    ]

    # Create bubble chart
    fig = px.scatter(
        df,
        x="x",
        y="y",
        size=size_values,
        color=color_values,
        # text="deduplicated_topic_name", # Remove text here
        hover_name="deduplicated_topic_name",
        hover_data={
            "x": False,
            "y": False,
            "bin_description": True,
        },
        size_max=42.5,  # Maximum size of the bubbles, change this to adjust the size
        color_continuous_scale=color_scale,
        custom_data=[
            "deduplicated_topic_name",
            "count",
            "negative_rate",
            "unresolved_rate",
            "urgent_rate",
            "bin_description",
        ],
    )

    # Update traces: Remove text related properties
    fig.update_traces(
        mode="markers",  # Remove '+text'
        marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
        hovertemplate="%{hovertext}<extra></extra>",
        hovertext=hover_text,
    )

    # Create annotations for the bubbles
    annotations = []
    for i, row in df.iterrows():
        # Wrap text every 2 words
        words = row["deduplicated_topic_name"].split()
        wrapped_text = "<br>".join(
            [" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
        )

        # Calculate size for vertical offset (approximately based on the bubble size)
        # Add vertical offset based on bubble size to place text below the bubble
        marker_size = (
            size_values[i] / 20  # type: ignore  # FIXME: size_values[df.index.get_loc(i)] / 20
        )  # Adjust this divisor as needed to get proper spacing

        annotations.append(
            dict(
                x=row["x"],
                y=row["y"]
                + 0.125  # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
                + marker_size,  # Add vertical offset to position text below the bubble
                text=wrapped_text,
                showarrow=False,
                textangle=0,
                font=dict(
                    size=10,
                    # size=8,
                    color="var(--foreground)",
                    family="Arial, sans-serif",
                    weight="bold",
                ),
                xanchor="center",
                yanchor="top",  # Anchor to top of text box so it hangs below the bubble
                bgcolor="rgba(255,255,255,0.7)",  # Add semi-transparent background for better readability
                bordercolor="rgba(0,0,0,0.1)",  # Add a subtle border color
                borderwidth=1,
                borderpad=1,
                # TODO: Radius for rounded corners
            )
        )

    # Add bin labels and separator lines
    unique_bins = sorted(df["bin"].unique())
    bin_y_positions = [
        df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
    ]

    # Dynamically extract bin descriptions
    bin_descriptions = df.set_index("bin")["bin_description"].to_dict()

    for bin_name, bin_y in zip(unique_bins, bin_y_positions):
        # Add horizontal line
        fig.add_shape(
            type="line",
            x0=0,
            y0=bin_y,
            x1=100,
            y1=bin_y,
            line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
            layer="below",
        )

    # Add subtle lines for each bin and bin labels
    for bin_name, bin_y in zip(unique_bins, bin_y_positions):
        # Add horizontal line
        fig.add_shape(
            type="line",
            x0=0,
            y0=bin_y,
            x1=100,
            y1=bin_y,
            line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
            layer="below",
        )

        # Add bin label annotation
        annotations.append(
            dict(
                x=0,  # Position the label on the left side
                y=bin_y,
                xref="x",
                yref="y",
                text=bin_descriptions[bin_name],
                showarrow=False,
                font=dict(size=8.25, color="var(--muted-foreground)"),
                align="left",
                xanchor="left",
                yanchor="middle",
                bgcolor="rgba(255,255,255,0.7)",
                borderpad=1,
            )
        )

    fig.update_layout(
        title=None,
        xaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            title=None,
            range=[0, 100],
        ),
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            title=None,
            range=[0, 100],
            autorange="reversed",  # Keep largest at top
        ),
        hovermode="closest",
        margin=dict(l=0, r=0, t=10, b=10),
        coloraxis_colorbar=dict(
            title=color_title,
            title_font=dict(size=9),
            tickfont=dict(size=8),
            thickness=10,
            len=0.6,
            yanchor="middle",
            y=0.5,
            xpad=0,
        ),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        paper_bgcolor="rgba(0,0,0,0)",
        plot_bgcolor="rgba(0,0,0,0)",
        hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
        annotations=annotations,  # Add bin labels as annotations
    )

    return fig


# Update the update_topic_details callback to use grayscale colors for tags based on frequency
@callback(
    [
        Output("topic-title", "children"),
        Output("topic-metadata", "children"),
        Output("topic-metrics", "children"),
        Output("important-tags", "children"),
        Output("sample-dialogs", "children"),
        Output("no-topic-selected", "style"),
    ],
    [Input("bubble-chart", "hoverData"), Input("bubble-chart", "clickData")],
    [State("stored-data", "data"), State("upload-data", "contents")],
)
def update_topic_details(hover_data, click_data, stored_data, file_contents):
    # Determine which data to use (prioritize click over hover)
    hover_info = hover_data or click_data

    if not hover_info or not stored_data or not file_contents:
        return "", [], [], "", [], {"display": "flex"}

    # Extract topic name from the hover data
    topic_name = hover_info["points"][0]["customdata"][0]

    # Get stored data for this topic
    df_stored = pd.DataFrame(stored_data)
    topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]

    # Get original data to sample conversations
    content_type, content_string = file_contents.split(",")
    decoded = base64.b64decode(content_string)

    if (
        content_type
        == "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
    ):
        df_full = pd.read_excel(io.BytesIO(decoded))
    else:  # Assume CSV
        df_full = pd.read_csv(io.StringIO(decoded.decode("utf-8")))

    # Filter to this topic
    topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]

    # Create the title
    title = html.Div([html.Span(topic_name)])

    # Create metadata items
    metadata_items = [
        html.Div(
            [
                html.I(className="fas fa-comments metadata-icon"),
                html.Span(f"{int(topic_data['count'])} dialogs"),
            ],
            className="metadata-item",
        ),
    ]

    # Create metrics boxes
    metrics_boxes = [
        html.Div(
            [
                html.Div(f"{topic_data['negative_rate']}%", className="metric-value"),
                html.Div("Negative Sentiment", className="metric-label"),
            ],
            className="metric-box negative",
        ),
        html.Div(
            [
                html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"),
                html.Div("Unresolved", className="metric-label"),
            ],
            className="metric-box unresolved",
        ),
        html.Div(
            [
                html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"),
                html.Div("Urgent", className="metric-label"),
            ],
            className="metric-box urgent",
        ),
    ]

    # New: Extract and process consolidated_tags with improved styling
    tags_list = []
    for _, row in topic_conversations.iterrows():
        tags_str = row.get("consolidated_tags", "")
        if pd.notna(tags_str):
            tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
            tags_list.extend(tags)

    # Count tag frequencies for better insight
    tag_counts = {}
    for tag in tags_list:
        tag_counts[tag] = tag_counts.get(tag, 0) + 1

    # Sort by frequency (most common first) and then alphabetically for ties
    sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))

    # Keep only the top K tags
    TOP_K = 15
    sorted_tags = sorted_tags[:TOP_K]

    if sorted_tags:
        # Create beautifully styled tags with count indicators and consistent color
        tags_output = html.Div(
            [
                html.Div(
                    [
                        html.I(className="fas fa-tag topic-tag-icon"),
                        html.Span(f"{tag} ({count})"),
                    ],
                    className="topic-tag",
                )
                for tag, count in sorted_tags
            ],
            className="tags-container",
        )
    else:
        tags_output = html.Div(
            [
                html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
                "No tags found for this topic",
            ],
            className="no-tags-message",
        )

    # Sample up to 5 random dialogs
    sample_size = min(5, len(topic_conversations))
    if sample_size > 0:
        sample_indices = random.sample(range(len(topic_conversations)), sample_size)
        samples = topic_conversations.iloc[sample_indices]

        dialog_items = []
        for _, row in samples.iterrows():
            # Create dialog item with tags
            sentiment_tag = html.Span(
                row["Sentiment"], className="dialog-tag tag-sentiment"
            )
            resolution_tag = html.Span(
                row["Resolution"], className="dialog-tag tag-resolution"
            )
            urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")

            # Add Chat ID tag if 'id' column exists
            chat_id_tag = None
            if "id" in row:
                chat_id_tag = html.Span(
                    f"Chat ID: {row['id']}", className="dialog-tag tag-chat-id"
                )

            # Compile all tags, including the new Chat ID tag if available
            tags = [sentiment_tag, resolution_tag, urgency_tag]
            if chat_id_tag:
                tags.append(chat_id_tag)

            dialog_items.append(
                html.Div(
                    [
                        html.Div(row["Summary"], className="dialog-summary"),
                        html.Div(
                            tags,
                            className="dialog-metadata",
                        ),
                    ],
                    className="dialog-item",
                )
            )

        sample_dialogs = dialog_items
    else:
        sample_dialogs = [
            html.Div(
                "No sample dialogs available for this topic.",
                style={"color": "var(--muted-foreground)"},
            )
        ]

    return (
        title,
        metadata_items,
        metrics_boxes,
        tags_output,
        sample_dialogs,
        {"display": "none"},
    )


if __name__ == "__main__":
    app.run_server(debug=False)