import pandas as pd
from datetime import datetime, timedelta
from scripts.utils import DATA_DIR


# Basic Week over Week Retention
def calculate_wow_retention_by_type(
    df: pd.DataFrame, market_creator: str
) -> pd.DataFrame:
    filtered_df = df.loc[df["market_creator"] == market_creator]
    # Get unique traders per week and type
    weekly_traders = (
        filtered_df.groupby(["month_year_week", "trader_type"])["trader_address"]
        .nunique()
        .reset_index()
    )
    weekly_traders = weekly_traders.sort_values(["trader_type", "month_year_week"])

    # Calculate retention
    retention = []
    # Iterate through each trader type
    for trader_type in weekly_traders["trader_type"].unique():
        type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]

        # Calculate retention for each week within this trader type
        for i in range(1, len(type_data)):
            current_week = type_data.iloc[i]["month_year_week"]
            previous_week = type_data.iloc[i - 1]["month_year_week"]

            # Get traders in both weeks for this type
            current_traders = set(
                filtered_df[
                    (filtered_df["month_year_week"] == current_week)
                    & (filtered_df["trader_type"] == trader_type)
                ]["trader_address"]
            )

            previous_traders = set(
                filtered_df[
                    (filtered_df["month_year_week"] == previous_week)
                    & (filtered_df["trader_type"] == trader_type)
                ]["trader_address"]
            )

            retained = len(current_traders.intersection(previous_traders))
            retention_rate = (
                (retained / len(previous_traders)) * 100
                if len(previous_traders) > 0
                else 0
            )

            retention.append(
                {
                    "trader_type": trader_type,
                    "week": current_week,
                    "retained_traders": retained,
                    "previous_traders": len(previous_traders),
                    "retention_rate": round(retention_rate, 2),
                }
            )

    return pd.DataFrame(retention)


# Cohort Retention
def calculate_cohort_retention(
    df: pd.DataFrame, market_creator: str, trader_type: str, max_weeks=12
) -> pd.DataFrame:
    df_filtered = df.loc[
        (df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
    ]
    # Get first week for each trader
    first_trades = (
        df_filtered.groupby("trader_address")
        .agg({"creation_timestamp": "min", "month_year_week": "first"})
        .reset_index()
    )
    first_trades.columns = ["trader_address", "first_trade", "cohort_week"]

    # Get ordered list of unique weeks - converting to datetime for proper sorting
    all_weeks = df_filtered["month_year_week"].unique()
    weeks_datetime = pd.to_datetime(all_weeks)
    sorted_weeks_idx = weeks_datetime.argsort()
    all_weeks = all_weeks[sorted_weeks_idx]

    # Create mapping from week string to numeric index
    week_to_number = {week: idx for idx, week in enumerate(all_weeks)}

    # Merge back to get all activities
    cohort_data = pd.merge(
        df_filtered,
        first_trades[["trader_address", "cohort_week"]],
        on="trader_address",
    )

    # Calculate week number since first activity
    cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
    cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
    cohort_data["week_number"] = (
        cohort_data["activity_number"] - cohort_data["cohort_number"]
    )

    # Calculate retention by cohort
    cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
    retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
        "trader_address"
    ].nunique()
    retention_matrix = retention_matrix.unstack(fill_value=0)

    # Convert to percentages
    retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100

    # Sort index (cohort_week) chronologically
    retention_matrix.index = pd.to_datetime(retention_matrix.index)
    retention_matrix = retention_matrix.sort_index()

    # Limit to max_weeks if specified
    if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
        retention_matrix = retention_matrix.iloc[:, :max_weeks]

    return retention_matrix.round(2)


def merge_retention_dataset(
    traders_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:

    traders_df["trader_type"] = traders_df["staking"].apply(
        lambda x: "non_Olas" if x == "non_Olas" else "Olas"
    )
    unknown_df["trader_type"] = "unclassified"
    all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)

    all_traders["creation_timestamp"] = pd.to_datetime(
        all_traders["creation_timestamp"]
    )
    all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
    all_traders["month_year_week"] = (
        all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
    )
    return all_traders


def prepare_retention_dataset(
    retention_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:

    retention_df["trader_type"] = retention_df["staking"].apply(
        lambda x: "non_Olas" if x == "non_Olas" else "Olas"
    )
    retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True)
    retention_df = retention_df[
        ["trader_type", "market_creator", "trader_address", "creation_timestamp"]
    ]
    unknown_df["trader_type"] = "unclassified"
    unknown_df = unknown_df[
        ["trader_type", "market_creator", "trader_address", "creation_timestamp"]
    ]
    all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)

    all_traders["creation_timestamp"] = pd.to_datetime(
        all_traders["creation_timestamp"]
    )
    all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
    all_traders["month_year_week"] = (
        all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
    )
    return all_traders


if __name__ == "__main__":
    # read all datasets
    traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
    unknown_df = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
    all_traders = prepare_retention_dataset(traders_df, unknown_df)
    # Usage example:
    wow_retention = calculate_wow_retention_by_type(all_traders)
    cohort_retention = calculate_cohort_retention(all_traders)