File size: 6,620 Bytes
f26bf5c 355fb10 f26bf5c dff5e35 8834fdb f26bf5c dff5e35 8834fdb f26bf5c 8834fdb f26bf5c 8834fdb dff5e35 8834fdb dff5e35 8834fdb f26bf5c dff5e35 63c3662 dff5e35 63c3662 f26bf5c dff5e35 f26bf5c 63c3662 f26bf5c 63c3662 f26bf5c 8834fdb f26bf5c dff5e35 355fb10 8834fdb f26bf5c 8834fdb dff5e35 8834fdb 355fb10 f26bf5c 8834fdb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import pandas as pd
from datetime import datetime, timedelta
from scripts.utils import DATA_DIR
# Basic Week over Week Retention
def calculate_wow_retention_by_type(
df: pd.DataFrame, market_creator: str
) -> pd.DataFrame:
filtered_df = df.loc[df["market_creator"] == market_creator]
# Get unique traders per week and type
weekly_traders = (
filtered_df.groupby(["month_year_week", "trader_type"])["trader_address"]
.nunique()
.reset_index()
)
weekly_traders = weekly_traders.sort_values(["trader_type", "month_year_week"])
# Calculate retention
retention = []
# Iterate through each trader type
for trader_type in weekly_traders["trader_type"].unique():
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]
# Calculate retention for each week within this trader type
for i in range(1, len(type_data)):
current_week = type_data.iloc[i]["month_year_week"]
previous_week = type_data.iloc[i - 1]["month_year_week"]
# Get traders in both weeks for this type
current_traders = set(
filtered_df[
(filtered_df["month_year_week"] == current_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
previous_traders = set(
filtered_df[
(filtered_df["month_year_week"] == previous_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (
(retained / len(previous_traders)) * 100
if len(previous_traders) > 0
else 0
)
retention.append(
{
"trader_type": trader_type,
"week": current_week,
"retained_traders": retained,
"previous_traders": len(previous_traders),
"retention_rate": round(retention_rate, 2),
}
)
return pd.DataFrame(retention)
# Cohort Retention
def calculate_cohort_retention(
df: pd.DataFrame, market_creator: str, trader_type: str, max_weeks=12
) -> pd.DataFrame:
df_filtered = df.loc[
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
]
# Get first week for each trader
first_trades = (
df_filtered.groupby("trader_address")
.agg({"creation_timestamp": "min", "month_year_week": "first"})
.reset_index()
)
first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = df_filtered["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Merge back to get all activities
cohort_data = pd.merge(
df_filtered,
first_trades[["trader_address", "cohort_week"]],
on="trader_address",
)
# Calculate week number since first activity
cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
cohort_data["week_number"] = (
cohort_data["activity_number"] - cohort_data["cohort_number"]
)
# Calculate retention by cohort
cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
"trader_address"
].nunique()
retention_matrix = retention_matrix.unstack(fill_value=0)
# Convert to percentages
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
# Sort index (cohort_week) chronologically
retention_matrix.index = pd.to_datetime(retention_matrix.index)
retention_matrix = retention_matrix.sort_index()
# Limit to max_weeks if specified
if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
retention_matrix = retention_matrix.iloc[:, :max_weeks]
return retention_matrix.round(2)
def merge_retention_dataset(
traders_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:
traders_df["trader_type"] = traders_df["staking"].apply(
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
)
unknown_df["trader_type"] = "unclassified"
all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)
all_traders["creation_timestamp"] = pd.to_datetime(
all_traders["creation_timestamp"]
)
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
all_traders["month_year_week"] = (
all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
)
return all_traders
def prepare_retention_dataset(
retention_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:
retention_df["trader_type"] = retention_df["staking"].apply(
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
)
retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True)
retention_df = retention_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
unknown_df["trader_type"] = "unclassified"
unknown_df = unknown_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)
all_traders["creation_timestamp"] = pd.to_datetime(
all_traders["creation_timestamp"]
)
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
all_traders["month_year_week"] = (
all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
)
return all_traders
if __name__ == "__main__":
# read all datasets
traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
unknown_df = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
all_traders = prepare_retention_dataset(traders_df, unknown_df)
# Usage example:
wow_retention = calculate_wow_retention_by_type(all_traders)
cohort_retention = calculate_cohort_retention(all_traders)
|