diff --git "a/notebooks/retention_metrics.ipynb" "b/notebooks/retention_metrics.ipynb" --- "a/notebooks/retention_metrics.ipynb" +++ "b/notebooks/retention_metrics.ipynb" @@ -12,6 +12,13 @@ "import gc" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -21,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -30,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -41,7 +48,7 @@ " dtype='object')" ] }, - "execution_count": 12, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -52,7 +59,58 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retention_df.staking.value_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "retention_df[\"trader_type\"] = retention_df[\"staking\"].apply(\n", + " lambda x: \"non_Olas\" if x == \"non_Olas\" else \"Olas\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "olas_data = retention_df.loc[retention_df[\"trader_type\"]==\"Olas\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -82,338 +140,261 @@ " request_date\n", " staking\n", " month_year_week\n", + " trader_type\n", " \n", " \n", " \n", " \n", - " 0\n", - " 0x721de88cee9be146c8f0c7ef1a4188bee36494d6\n", - " 2024-10-25 00:00:20+00:00\n", + " 2\n", + " 0x8fb970f4aff9b61e6b3bc5a8117b437b89c88711\n", + " 2024-11-13 00:04:25+00:00\n", " quickstart\n", - " 2024-10-25\n", + " 2024-11-13\n", " non_staking\n", - " Oct-25-2024\n", + " Nov-13-2024\n", + " Olas\n", " \n", " \n", - " 1\n", - " 0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a\n", - " 2024-10-25 00:00:25+00:00\n", - " quickstart\n", - " 2024-10-25\n", - " non_staking\n", - " Oct-25-2024\n", + " 3\n", + " 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e\n", + " 2024-11-13 00:05:10+00:00\n", + " pearl\n", + " 2024-11-13\n", + " pearl\n", + " Nov-13-2024\n", + " Olas\n", " \n", " \n", - " 2\n", - " 0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684\n", - " 2024-10-25 00:00:55+00:00\n", - " quickstart\n", - " 2024-10-25\n", - " non_staking\n", - " Oct-25-2024\n", + " 6\n", + " 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e\n", + " 2024-11-13 00:08:05+00:00\n", + " pearl\n", + " 2024-11-13\n", + " pearl\n", + " Nov-13-2024\n", + " Olas\n", " \n", " \n", - " 3\n", - " 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6\n", - " 2024-10-25 00:00:55+00:00\n", - " quickstart\n", - " 2024-10-25\n", - " non_staking\n", - " Oct-25-2024\n", + " 10\n", + " 0xd71b78ce490776a8f0cad6876ea79bc190f7bcce\n", + " 2024-11-13 00:19:45+00:00\n", + " pearl\n", + " 2024-11-13\n", + " pearl\n", + " Nov-13-2024\n", + " Olas\n", " \n", " \n", - " 4\n", - " 0xc20678890f94d0162593c46fe5da67d9a4b7a6fb\n", - " 2024-10-25 00:01:05+00:00\n", + " 11\n", + " 0x6f40dbf1f102d47248802a423c0cd117ac4a3781\n", + " 2024-11-13 00:27:30+00:00\n", " quickstart\n", - " 2024-10-25\n", + " 2024-11-13\n", " non_staking\n", - " Oct-25-2024\n", + " Nov-13-2024\n", + " Olas\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trader_address request_time \\\n", - "0 0x721de88cee9be146c8f0c7ef1a4188bee36494d6 2024-10-25 00:00:20+00:00 \n", - "1 0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a 2024-10-25 00:00:25+00:00 \n", - "2 0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684 2024-10-25 00:00:55+00:00 \n", - "3 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 2024-10-25 00:00:55+00:00 \n", - "4 0xc20678890f94d0162593c46fe5da67d9a4b7a6fb 2024-10-25 00:01:05+00:00 \n", + " trader_address request_time \\\n", + "2 0x8fb970f4aff9b61e6b3bc5a8117b437b89c88711 2024-11-13 00:04:25+00:00 \n", + "3 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e 2024-11-13 00:05:10+00:00 \n", + "6 0x1fe2b09de07475b1027b0c73a5bf52693b31a52e 2024-11-13 00:08:05+00:00 \n", + "10 0xd71b78ce490776a8f0cad6876ea79bc190f7bcce 2024-11-13 00:19:45+00:00 \n", + "11 0x6f40dbf1f102d47248802a423c0cd117ac4a3781 2024-11-13 00:27:30+00:00 \n", "\n", - " market_creator request_date staking month_year_week \n", - "0 quickstart 2024-10-25 non_staking Oct-25-2024 \n", - "1 quickstart 2024-10-25 non_staking Oct-25-2024 \n", - "2 quickstart 2024-10-25 non_staking Oct-25-2024 \n", - "3 quickstart 2024-10-25 non_staking Oct-25-2024 \n", - "4 quickstart 2024-10-25 non_staking Oct-25-2024 " + " market_creator request_date staking month_year_week trader_type \n", + "2 quickstart 2024-11-13 non_staking Nov-13-2024 Olas \n", + "3 pearl 2024-11-13 pearl Nov-13-2024 Olas \n", + "6 pearl 2024-11-13 pearl Nov-13-2024 Olas \n", + "10 pearl 2024-11-13 pearl Nov-13-2024 Olas \n", + "11 quickstart 2024-11-13 non_staking Nov-13-2024 Olas " ] }, - "execution_count": 13, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "retention_df.head()" + "olas_data.head()" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "staking\n", - "non_Olas 764956\n", - "non_staking 275246\n", - "pearl 56487\n", - "quickstart 48511\n", - "Name: count, dtype: int64" + "" ] }, - "execution_count": 14, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "retention_df.staking.value_counts()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Join the two datasets" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# read trades dataset\n", - "traders_df = pd.read_parquet(\"../data/all_trades_profitability.parquet\")\n", - "unknown_df = pd.read_parquet(\"../data/unknown_traders.parquet\")\n" + "sns.histplot(olas_data, y=\"request_date\", hue=\"market_creator\")" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "staking\n", - "non_Olas 56266\n", - "non_staking 20954\n", - "pearl 6084\n", - "quickstart 3975\n", - "Name: count, dtype: int64" + "Index(['trader_address', 'request_time', 'market_creator', 'request_date',\n", + " 'staking', 'month_year_week'],\n", + " dtype='object')" ] }, - "execution_count": 25, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "traders_df.staking.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "traders_df[\"trader_type\"] = traders_df[\"staking\"].apply(\n", - " lambda x: \"non_Olas\" if x == \"non_Olas\" else \"Olas\"\n", - ")" + "retention_df.columns" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "trader_type\n", - "non_Olas 56266\n", - "Olas 31013\n", - "Name: count, dtype: int64" + "" ] }, - "execution_count": 30, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "traders_df.trader_type.value_counts()" + "import seaborn as sns\n", + "\n", + "sns.histplot(retention_df, y=\"request_date\")" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "staking\n", - "non_Olas 1654\n", - "Name: count, dtype: int64" + "Timestamp('2024-10-25 00:00:20+0000', tz='UTC')" ] }, - "execution_count": 26, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "unknown_df.staking.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "unknown_df[\"trader_type\"] = \"unclassified\"" + "min(retention_df.request_time)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)" + "from datetime import datetime\n", + "cutoff_date1 = \"2024-11-13\"\n", + "timestamp1 = pd.Timestamp(\n", + "datetime.strptime(cutoff_date1, \"%Y-%m-%d\")\n", + ").tz_localize(\"UTC\")" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "trader_type\n", - "non_Olas 56266\n", - "Olas 31013\n", - "unclassified 1654\n", - "Name: count, dtype: int64" + "Timestamp('2024-11-13 00:00:15+0000', tz='UTC')" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "all_traders.trader_type.value_counts()" + "retention_df = retention_df.loc[retention_df[\"request_time\"]>=timestamp1]\n", + "min(retention_df.request_time)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 34, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_51242/2488528526.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", - " all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n" - ] + "data": { + "text/plain": [ + "Index(['trader_address', 'request_time', 'market_creator', 'request_date',\n", + " 'staking', 'month_year_week'],\n", + " dtype='object')" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# First, create week numbers from timestamps\n", - "all_traders[\"creation_timestamp\"] = pd.to_datetime(all_traders[\"creation_timestamp\"])\n", - "all_traders = all_traders.sort_values(by=\"creation_timestamp\", ascending=True)\n", - "all_traders[\"month_year_week\"] = (\n", - "all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n", - ")" + "retention_df.columns" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 33, "metadata": {}, + "outputs": [], "source": [ - "# WoW Retention" + "retention_df.to_parquet(\"../data/retention_activity.parquet\", index=False)" ] }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "def calculate_wow_retention_by_type(df):\n", - " # Get unique traders per week and type\n", - " weekly_traders = df.groupby(['month_year_week', 'trader_type'])['trader_address'].nunique().reset_index()\n", - " weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])\n", - " \n", - " # Calculate retention\n", - " retention = []\n", - " # Iterate through each trader type\n", - " for trader_type in weekly_traders['trader_type'].unique():\n", - " type_data = weekly_traders[weekly_traders['trader_type'] == trader_type]\n", - " \n", - " # Calculate retention for each week within this trader type\n", - " for i in range(1, len(type_data)):\n", - " current_week = type_data.iloc[i]['month_year_week']\n", - " previous_week = type_data.iloc[i-1]['month_year_week']\n", - " \n", - " # Get traders in both weeks for this type\n", - " current_traders = set(df[\n", - " (df['month_year_week'] == current_week) & \n", - " (df['trader_type'] == trader_type)\n", - " ]['trader_address'])\n", - " \n", - " previous_traders = set(df[\n", - " (df['month_year_week'] == previous_week) & \n", - " (df['trader_type'] == trader_type)\n", - " ]['trader_address'])\n", - " \n", - " retained = len(current_traders.intersection(previous_traders))\n", - " retention_rate = (retained / len(previous_traders)) * 100 if len(previous_traders) > 0 else 0\n", - " \n", - " retention.append({\n", - " 'trader_type': trader_type,\n", - " 'week': current_week,\n", - " 'retained_traders': retained,\n", - " 'previous_traders': len(previous_traders),\n", - " 'retention_rate': round(retention_rate, 2)\n", - " })\n", - " \n", - " return pd.DataFrame(retention)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "wow_retention = calculate_wow_retention_by_type(all_traders)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -437,212 +418,573 @@ " \n", " \n", " \n", - " trader_type\n", - " week\n", - " retained_traders\n", - " previous_traders\n", - " retention_rate\n", + " trader_address\n", + " request_time\n", + " market_creator\n", + " request_date\n", + " staking\n", + " month_year_week\n", " \n", " \n", " \n", " \n", " 0\n", - " Olas\n", - " Dec-08-2024\n", - " 93\n", - " 98\n", - " 94.90\n", + " 0x721de88cee9be146c8f0c7ef1a4188bee36494d6\n", + " 2024-10-25 00:00:20+00:00\n", + " quickstart\n", + " 2024-10-25\n", + " non_staking\n", + " Oct-25-2024\n", " \n", " \n", " 1\n", - " Olas\n", - " Dec-15-2024\n", - " 187\n", - " 207\n", - " 90.34\n", + " 0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a\n", + " 2024-10-25 00:00:25+00:00\n", + " quickstart\n", + " 2024-10-25\n", + " non_staking\n", + " Oct-25-2024\n", " \n", " \n", " 2\n", - " Olas\n", - " Dec-22-2024\n", - " 186\n", - " 213\n", - " 87.32\n", + " 0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684\n", + " 2024-10-25 00:00:55+00:00\n", + " quickstart\n", + " 2024-10-25\n", + " non_staking\n", + " Oct-25-2024\n", " \n", " \n", " 3\n", - " Olas\n", - " Dec-29-2024\n", - " 143\n", - " 203\n", - " 70.44\n", + " 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6\n", + " 2024-10-25 00:00:55+00:00\n", + " quickstart\n", + " 2024-10-25\n", + " non_staking\n", + " Oct-25-2024\n", " \n", " \n", " 4\n", - " Olas\n", - " Jan-05-2025\n", - " 117\n", - " 148\n", - " 79.05\n", + " 0xc20678890f94d0162593c46fe5da67d9a4b7a6fb\n", + " 2024-10-25 00:01:05+00:00\n", + " quickstart\n", + " 2024-10-25\n", + " non_staking\n", + " Oct-25-2024\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trader_type week retained_traders previous_traders retention_rate\n", - "0 Olas Dec-08-2024 93 98 94.90\n", - "1 Olas Dec-15-2024 187 207 90.34\n", - "2 Olas Dec-22-2024 186 213 87.32\n", - "3 Olas Dec-29-2024 143 203 70.44\n", - "4 Olas Jan-05-2025 117 148 79.05" + " trader_address request_time \\\n", + "0 0x721de88cee9be146c8f0c7ef1a4188bee36494d6 2024-10-25 00:00:20+00:00 \n", + "1 0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a 2024-10-25 00:00:25+00:00 \n", + "2 0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684 2024-10-25 00:00:55+00:00 \n", + "3 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 2024-10-25 00:00:55+00:00 \n", + "4 0xc20678890f94d0162593c46fe5da67d9a4b7a6fb 2024-10-25 00:01:05+00:00 \n", + "\n", + " market_creator request_date staking month_year_week \n", + "0 quickstart 2024-10-25 non_staking Oct-25-2024 \n", + "1 quickstart 2024-10-25 non_staking Oct-25-2024 \n", + "2 quickstart 2024-10-25 non_staking Oct-25-2024 \n", + "3 quickstart 2024-10-25 non_staking Oct-25-2024 \n", + "4 quickstart 2024-10-25 non_staking Oct-25-2024 " ] }, - "execution_count": 35, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "wow_retention.head()" + "retention_df.head()" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trader_typeweekretained_tradersprevious_tradersretention_rate
9non_Olas2024-12-08154154100.00
10non_Olas2024-12-1530132492.90
11non_Olas2024-12-2231032196.57
12non_Olas2024-12-2931234191.50
13non_Olas2025-01-0530432693.25
14non_Olas2025-01-1224633373.87
15non_Olas2024-11-105125120.32
16non_Olas2024-11-179010090.00
17non_Olas2024-11-2415118183.43
\n", - "
" - ], "text/plain": [ - " trader_type week retained_traders previous_traders retention_rate\n", - "9 non_Olas 2024-12-08 154 154 100.00\n", - "10 non_Olas 2024-12-15 301 324 92.90\n", - "11 non_Olas 2024-12-22 310 321 96.57\n", - "12 non_Olas 2024-12-29 312 341 91.50\n", - "13 non_Olas 2025-01-05 304 326 93.25\n", - "14 non_Olas 2025-01-12 246 333 73.87\n", - "15 non_Olas 2024-11-10 51 251 20.32\n", - "16 non_Olas 2024-11-17 90 100 90.00\n", - "17 non_Olas 2024-11-24 151 181 83.43" + "staking\n", + "non_Olas 764956\n", + "non_staking 275246\n", + "pearl 56487\n", + "quickstart 48511\n", + "Name: count, dtype: int64" ] }, - "execution_count": 39, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "non_olas = wow_retention.loc[wow_retention[\"trader_type\"]==\"non_Olas\"]\n", - "non_olas" + "retention_df.staking.value_counts()" ] }, { - "cell_type": "code", - "execution_count": 47, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Join the two datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "# read trades dataset\n", + "traders_df = pd.read_parquet(\"../data/all_trades_profitability.parquet\")\n", + "unknown_df = pd.read_parquet(\"../data/unknown_traders.parquet\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "staking\n", + "non_Olas 56266\n", + "non_staking 20954\n", + "pearl 6084\n", + "quickstart 3975\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traders_df.staking.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "traders_df[\"trader_type\"] = traders_df[\"staking\"].apply(\n", + " lambda x: \"non_Olas\" if x == \"non_Olas\" else \"Olas\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trader_type\n", + "non_Olas 56266\n", + "Olas 31013\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traders_df.trader_type.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "staking\n", + "non_Olas 1654\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unknown_df.staking.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "unknown_df[\"trader_type\"] = \"unclassified\"" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trader_type\n", + "non_Olas 56266\n", + "Olas 31013\n", + "unclassified 1654\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_traders.trader_type.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_51242/2488528526.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", + " all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n" + ] + } + ], + "source": [ + "# First, create week numbers from timestamps\n", + "all_traders[\"creation_timestamp\"] = pd.to_datetime(all_traders[\"creation_timestamp\"])\n", + "all_traders = all_traders.sort_values(by=\"creation_timestamp\", ascending=True)\n", + "all_traders[\"month_year_week\"] = (\n", + "all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WoW Retention" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_wow_retention_by_type(df):\n", + " # Get unique traders per week and type\n", + " weekly_traders = df.groupby(['month_year_week', 'trader_type'])['trader_address'].nunique().reset_index()\n", + " weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])\n", + " \n", + " # Calculate retention\n", + " retention = []\n", + " # Iterate through each trader type\n", + " for trader_type in weekly_traders['trader_type'].unique():\n", + " type_data = weekly_traders[weekly_traders['trader_type'] == trader_type]\n", + " \n", + " # Calculate retention for each week within this trader type\n", + " for i in range(1, len(type_data)):\n", + " current_week = type_data.iloc[i]['month_year_week']\n", + " previous_week = type_data.iloc[i-1]['month_year_week']\n", + " \n", + " # Get traders in both weeks for this type\n", + " current_traders = set(df[\n", + " (df['month_year_week'] == current_week) & \n", + " (df['trader_type'] == trader_type)\n", + " ]['trader_address'])\n", + " \n", + " previous_traders = set(df[\n", + " (df['month_year_week'] == previous_week) & \n", + " (df['trader_type'] == trader_type)\n", + " ]['trader_address'])\n", + " \n", + " retained = len(current_traders.intersection(previous_traders))\n", + " retention_rate = (retained / len(previous_traders)) * 100 if len(previous_traders) > 0 else 0\n", + " \n", + " retention.append({\n", + " 'trader_type': trader_type,\n", + " 'week': current_week,\n", + " 'retained_traders': retained,\n", + " 'previous_traders': len(previous_traders),\n", + " 'retention_rate': round(retention_rate, 2)\n", + " })\n", + " \n", + " return pd.DataFrame(retention)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "wow_retention = calculate_wow_retention_by_type(all_traders)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trader_typeweekretained_tradersprevious_tradersretention_rate
0OlasDec-08-2024939894.90
1OlasDec-15-202418720790.34
2OlasDec-22-202418621387.32
3OlasDec-29-202414320370.44
4OlasJan-05-202511714879.05
\n", + "
" + ], + "text/plain": [ + " trader_type week retained_traders previous_traders retention_rate\n", + "0 Olas Dec-08-2024 93 98 94.90\n", + "1 Olas Dec-15-2024 187 207 90.34\n", + "2 Olas Dec-22-2024 186 213 87.32\n", + "3 Olas Dec-29-2024 143 203 70.44\n", + "4 Olas Jan-05-2025 117 148 79.05" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wow_retention.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trader_typeweekretained_tradersprevious_tradersretention_rate
9non_Olas2024-12-08154154100.00
10non_Olas2024-12-1530132492.90
11non_Olas2024-12-2231032196.57
12non_Olas2024-12-2931234191.50
13non_Olas2025-01-0530432693.25
14non_Olas2025-01-1224633373.87
15non_Olas2024-11-105125120.32
16non_Olas2024-11-179010090.00
17non_Olas2024-11-2415118183.43
\n", + "
" + ], + "text/plain": [ + " trader_type week retained_traders previous_traders retention_rate\n", + "9 non_Olas 2024-12-08 154 154 100.00\n", + "10 non_Olas 2024-12-15 301 324 92.90\n", + "11 non_Olas 2024-12-22 310 321 96.57\n", + "12 non_Olas 2024-12-29 312 341 91.50\n", + "13 non_Olas 2025-01-05 304 326 93.25\n", + "14 non_Olas 2025-01-12 246 333 73.87\n", + "15 non_Olas 2024-11-10 51 251 20.32\n", + "16 non_Olas 2024-11-17 90 100 90.00\n", + "17 non_Olas 2024-11-24 151 181 83.43" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "non_olas = wow_retention.loc[wow_retention[\"trader_type\"]==\"non_Olas\"]\n", + "non_olas" + ] + }, + { + "cell_type": "code", + "execution_count": 47, "metadata": {}, "outputs": [], "source": [ @@ -1686,79 +2028,693 @@ } }, "metadata": {}, - "output_type": "display_data" + "output_type": "display_data" + } + ], + "source": [ + "# Create and show the plot\n", + "fig = plot_wow_retention_by_type(wow_retention)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cohort retention" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_cohort_retention(\n", + " df: pd.DataFrame, market_creator: str, trader_type: str\n", + ") -> pd.DataFrame:\n", + " df_filtered = df.loc[\n", + " (df[\"market_creator\"] == market_creator) & (df[\"trader_type\"] == trader_type)\n", + " ]\n", + " # Get first week for each trader\n", + " first_trades = (\n", + " df_filtered.groupby(\"trader_address\")\n", + " .agg({\"creation_timestamp\": \"min\", \"month_year_week\": \"first\"})\n", + " .reset_index()\n", + " )\n", + " first_trades.columns = [\"trader_address\", \"first_trade\", \"cohort_week\"]\n", + "\n", + " # Get ordered list of unique weeks - converting to datetime for proper sorting\n", + " all_weeks = df_filtered[\"month_year_week\"].unique()\n", + " weeks_datetime = pd.to_datetime(all_weeks)\n", + " sorted_weeks_idx = weeks_datetime.argsort()\n", + " all_weeks = all_weeks[sorted_weeks_idx]\n", + " print(f\"all_weeks={all_weeks}\")\n", + "\n", + " # Create mapping from week string to numeric index\n", + " week_to_number = {week: idx for idx, week in enumerate(all_weeks)}\n", + "\n", + " # Merge back to get all activities\n", + " cohort_data = pd.merge(\n", + " df_filtered,\n", + " first_trades[[\"trader_address\", \"cohort_week\"]],\n", + " on=\"trader_address\",\n", + " )\n", + " print(cohort_data.tail())\n", + " print(cohort_data.cohort_week.value_counts())\n", + "\n", + " # Calculate week number since first activity\n", + " cohort_data[\"cohort_number\"] = cohort_data[\"cohort_week\"].map(week_to_number)\n", + " cohort_data[\"activity_number\"] = cohort_data[\"month_year_week\"].map(week_to_number)\n", + " cohort_data[\"week_number\"] = (\n", + " cohort_data[\"activity_number\"] - cohort_data[\"cohort_number\"]\n", + " )\n", + "\n", + " # Calculate retention by cohort\n", + " cohort_sizes = cohort_data.groupby(\"cohort_week\")[\"trader_address\"].nunique()\n", + " print(\"COHORT SIZES\")\n", + " print(cohort_sizes)\n", + " retention_matrix = cohort_data.groupby([\"cohort_week\", \"week_number\"])[\n", + " \"trader_address\"\n", + " ].nunique()\n", + " retention_matrix = retention_matrix.unstack(fill_value=0)\n", + "\n", + " # Convert to percentages\n", + " retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100\n", + "\n", + " # Sort index (cohort_week) chronologically\n", + " retention_matrix.index = pd.to_datetime(retention_matrix.index)\n", + " retention_matrix = retention_matrix.sort_index()\n", + "\n", + " return retention_matrix.round(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "def prepare_retention_dataset(\n", + " retention_df: pd.DataFrame, unknown_df: pd.DataFrame\n", + ") -> pd.DataFrame:\n", + "\n", + " retention_df[\"trader_type\"] = retention_df[\"staking\"].apply(\n", + " lambda x: \"non_Olas\" if x == \"non_Olas\" else \"Olas\"\n", + " )\n", + " retention_df.rename(columns={\"request_time\": \"creation_timestamp\"}, inplace=True)\n", + " retention_df = retention_df[\n", + " [\"trader_type\", \"market_creator\", \"trader_address\", \"creation_timestamp\"]\n", + " ]\n", + " unknown_df[\"trader_type\"] = \"unclassified\"\n", + " unknown_df = unknown_df[\n", + " [\"trader_type\", \"market_creator\", \"trader_address\", \"creation_timestamp\"]\n", + " ]\n", + " all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)\n", + "\n", + " all_traders[\"creation_timestamp\"] = pd.to_datetime(\n", + " all_traders[\"creation_timestamp\"]\n", + " )\n", + " all_traders = all_traders.sort_values(by=\"creation_timestamp\", ascending=True)\n", + " all_traders[\"month_year_week\"] = (\n", + " all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n", + " )\n", + " return all_traders" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_51242/2945974734.py:23: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", + " all_traders[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n" + ] + } + ], + "source": [ + "all_traders = prepare_retention_dataset(retention_df=retention_df, unknown_df=unknown_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "all_weeks=['Nov-17-2024' 'Nov-24-2024' 'Dec-01-2024' 'Dec-08-2024' 'Dec-15-2024'\n", + " 'Dec-22-2024' 'Dec-29-2024' 'Jan-05-2025' 'Jan-12-2025' 'Jan-19-2025']\n", + " trader_type market_creator trader_address \\\n", + "50119 Olas pearl 0xcaa568047b0ae065b5cf5d29b875e31afe0ab70e \n", + "50120 Olas pearl 0xcaa568047b0ae065b5cf5d29b875e31afe0ab70e \n", + "50121 Olas pearl 0x5fc7213135962250147030c5dd30b84a80f2ad1e \n", + "50122 Olas pearl 0xe715cc8f264ab48f75bb1b5c11d7dbaf949d73c5 \n", + "50123 Olas pearl 0xe715cc8f264ab48f75bb1b5c11d7dbaf949d73c5 \n", + "\n", + " creation_timestamp month_year_week cohort_week \n", + "50119 2025-01-14 12:34:00+00:00 Jan-19-2025 Dec-08-2024 \n", + "50120 2025-01-14 12:36:35+00:00 Jan-19-2025 Dec-08-2024 \n", + "50121 2025-01-14 13:18:10+00:00 Jan-19-2025 Nov-17-2024 \n", + "50122 2025-01-14 13:21:10+00:00 Jan-19-2025 Nov-17-2024 \n", + "50123 2025-01-14 13:22:55+00:00 Jan-19-2025 Nov-17-2024 \n", + "cohort_week\n", + "Nov-17-2024 46407\n", + "Nov-24-2024 1802\n", + "Dec-01-2024 848\n", + "Dec-08-2024 578\n", + "Dec-15-2024 429\n", + "Jan-19-2025 46\n", + "Dec-22-2024 7\n", + "Dec-29-2024 7\n", + "Name: count, dtype: int64\n", + "COHORT SIZES\n", + "cohort_week\n", + "Dec-01-2024 7\n", + "Dec-08-2024 6\n", + "Dec-15-2024 10\n", + "Dec-22-2024 2\n", + "Dec-29-2024 1\n", + "Jan-19-2025 1\n", + "Nov-17-2024 143\n", + "Nov-24-2024 6\n", + "Name: trader_address, dtype: int64\n" + ] + } + ], + "source": [ + "olas_cohort_pearl = calculate_cohort_retention(df=all_traders, market_creator=\"pearl\", trader_type=\"Olas\")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
week_number0123456789
cohort_week
2024-12-08100.083.3366.6766.6733.3350.050.00.00.00.0
2024-12-15100.040.0030.0030.0020.000.00.00.00.00.0
2024-12-22100.00.000.000.000.000.00.00.00.00.0
2024-12-29100.00.000.000.000.000.00.00.00.00.0
2025-01-19100.00.000.000.000.000.00.00.00.00.0
\n", + "
" + ], + "text/plain": [ + "week_number 0 1 2 3 4 5 6 7 8 9\n", + "cohort_week \n", + "2024-12-08 100.0 83.33 66.67 66.67 33.33 50.0 50.0 0.0 0.0 0.0\n", + "2024-12-15 100.0 40.00 30.00 30.00 20.00 0.0 0.0 0.0 0.0 0.0\n", + "2024-12-22 100.0 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0\n", + "2024-12-29 100.0 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0\n", + "2025-01-19 100.0 0.00 0.00 0.00 0.00 0.0 0.0 0.0 0.0 0.0" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olas_cohort_pearl.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64', name='week_number')" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "olas_cohort_pearl.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "olas_data.rename(columns={\"request_time\": \"creation_timestamp\"}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_51242/1537996894.py:3: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", + " olas_data[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n" + ] + } + ], + "source": [ + "olas_data = olas_data.sort_values(by=\"creation_timestamp\", ascending=True)\n", + "olas_data[\"month_year_week\"] = (\n", + " olas_data[\"creation_timestamp\"].dt.to_period(\"W\").dt.strftime(\"%b-%d-%Y\")\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "first_trades = (\n", + " olas_data.groupby(\"trader_address\")\n", + " .agg({\"creation_timestamp\": \"min\", \"month_year_week\": \"first\"})\n", + " .reset_index()\n", + " )\n", + "first_trades.columns = [\"trader_address\", \"first_trade\", \"cohort_week\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trader_addressfirst_tradecohort_week
00x006f70b4e3c3a3648f31ec16b2e7106fc58166f22024-11-20 07:26:35+00:00Nov-24-2024
10x00897abcbbefe4f558956b7a9d1b7819677e4d902024-11-13 10:21:25+00:00Nov-17-2024
20x01274796ce41aa8e8312e05a427ffb4b0d2148f62024-11-13 10:13:05+00:00Nov-17-2024
30x01c72d0743a22b70d73c76c5e16ba7524e20e0c02024-11-13 19:27:15+00:00Nov-17-2024
40x0244169d0fe1014b9e71f71070099d9c2364af282024-11-16 06:06:20+00:00Nov-17-2024
............
2680xfaa64c148c32af3552413438ec78599bffbd077a2024-11-13 03:23:00+00:00Nov-17-2024
2690xfab01f48385de3ffea00f9bf9f27c888e4802d172024-11-13 12:22:50+00:00Nov-17-2024
2700xfe16926cefc4db4a7496bfc3e961445228fbbf392024-12-15 12:47:10+00:00Dec-15-2024
2710xfe2c8e93ce30d1c961831d8e4d120a307aedbe722024-11-13 11:44:40+00:00Nov-17-2024
2720xfe94203ab2c1c22fe8585cbebf865f7b69eb70272024-11-13 00:46:35+00:00Nov-17-2024
\n", + "

273 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " trader_address first_trade \\\n", + "0 0x006f70b4e3c3a3648f31ec16b2e7106fc58166f2 2024-11-20 07:26:35+00:00 \n", + "1 0x00897abcbbefe4f558956b7a9d1b7819677e4d90 2024-11-13 10:21:25+00:00 \n", + "2 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 2024-11-13 10:13:05+00:00 \n", + "3 0x01c72d0743a22b70d73c76c5e16ba7524e20e0c0 2024-11-13 19:27:15+00:00 \n", + "4 0x0244169d0fe1014b9e71f71070099d9c2364af28 2024-11-16 06:06:20+00:00 \n", + ".. ... ... \n", + "268 0xfaa64c148c32af3552413438ec78599bffbd077a 2024-11-13 03:23:00+00:00 \n", + "269 0xfab01f48385de3ffea00f9bf9f27c888e4802d17 2024-11-13 12:22:50+00:00 \n", + "270 0xfe16926cefc4db4a7496bfc3e961445228fbbf39 2024-12-15 12:47:10+00:00 \n", + "271 0xfe2c8e93ce30d1c961831d8e4d120a307aedbe72 2024-11-13 11:44:40+00:00 \n", + "272 0xfe94203ab2c1c22fe8585cbebf865f7b69eb7027 2024-11-13 00:46:35+00:00 \n", + "\n", + " cohort_week \n", + "0 Nov-24-2024 \n", + "1 Nov-17-2024 \n", + "2 Nov-17-2024 \n", + "3 Nov-17-2024 \n", + "4 Nov-17-2024 \n", + ".. ... \n", + "268 Nov-17-2024 \n", + "269 Nov-17-2024 \n", + "270 Dec-15-2024 \n", + "271 Nov-17-2024 \n", + "272 Nov-17-2024 \n", + "\n", + "[273 rows x 3 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Create and show the plot\n", - "fig = plot_wow_retention_by_type(wow_retention)\n", - "fig.show()" + "first_trades" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 85, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Nov-17-2024', 'Nov-24-2024', 'Dec-01-2024', 'Dec-08-2024',\n", + " 'Dec-15-2024', 'Dec-22-2024', 'Dec-29-2024', 'Jan-05-2025',\n", + " 'Jan-12-2025', 'Jan-19-2025'], dtype=object)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Cohort retention" + "all_weeks = olas_data[\"month_year_week\"].unique()\n", + "all_weeks" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ - "def calculate_cohort_retention(df, max_weeks=12):\n", - " # Get first week for each trader\n", - " first_trades = (\n", - " df.groupby(\"trader_address\")\n", - " .agg({\"creation_timestamp\": \"min\", \"month_year_week\": \"first\"})\n", - " .reset_index()\n", - " )\n", - " first_trades.columns = [\"trader_address\", \"first_trade\", \"cohort_week\"]\n", - "\n", - " # Get ordered list of unique weeks - converting to datetime for proper sorting\n", - " all_weeks = df[\"month_year_week\"].unique()\n", - " weeks_datetime = pd.to_datetime(all_weeks)\n", - " sorted_weeks_idx = weeks_datetime.argsort()\n", - " all_weeks = all_weeks[sorted_weeks_idx]\n", - "\n", - " # Create mapping from week string to numeric index\n", - " week_to_number = {week: idx for idx, week in enumerate(all_weeks)}\n", + "all_weeks = olas_data[\"month_year_week\"].unique()\n", + "weeks_datetime = pd.to_datetime(all_weeks)\n", + "sorted_weeks_idx = weeks_datetime.argsort()\n", + "all_weeks = all_weeks[sorted_weeks_idx]" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Nov-17-2024', 'Nov-24-2024', 'Dec-01-2024', 'Dec-08-2024',\n", + " 'Dec-15-2024', 'Dec-22-2024', 'Dec-29-2024', 'Jan-05-2025',\n", + " 'Jan-12-2025', 'Jan-19-2025'], dtype=object)" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_weeks" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "# Create mapping from week string to numeric index\n", + "week_to_number = {week: idx for idx, week in enumerate(all_weeks)}\n", "\n", - " # Merge back to get all activities\n", - " cohort_data = pd.merge(\n", - " df, first_trades[[\"trader_address\", \"cohort_week\"]], on=\"trader_address\"\n", - " )\n", + "# Merge back to get all activities\n", + "cohort_data = pd.merge(\n", + " olas_data,\n", + " first_trades[[\"trader_address\", \"cohort_week\"]],\n", + " on=\"trader_address\",\n", + ")\n", "\n", - " # Calculate week number since first activity\n", - " cohort_data[\"cohort_number\"] = cohort_data[\"cohort_week\"].map(week_to_number)\n", - " cohort_data[\"activity_number\"] = cohort_data[\"month_year_week\"].map(week_to_number)\n", - " cohort_data[\"week_number\"] = (\n", - " cohort_data[\"activity_number\"] - cohort_data[\"cohort_number\"]\n", - " )\n", + "# Calculate week number since first activity\n", + "cohort_data[\"cohort_number\"] = cohort_data[\"cohort_week\"].map(week_to_number)\n", + "cohort_data[\"activity_number\"] = cohort_data[\"month_year_week\"].map(week_to_number)\n", + "cohort_data[\"week_number\"] = (\n", + " cohort_data[\"activity_number\"] - cohort_data[\"cohort_number\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cohort_week\n", + "Dec-01-2024 10\n", + "Dec-08-2024 12\n", + "Dec-15-2024 10\n", + "Dec-22-2024 2\n", + "Dec-29-2024 1\n", + "Jan-05-2025 1\n", + "Jan-19-2025 1\n", + "Nov-17-2024 220\n", + "Nov-24-2024 16\n", + "Name: trader_address, dtype: int64\n" + ] + } + ], + "source": [ + "cohort_sizes = cohort_data.groupby(\"cohort_week\")[\"trader_address\"].nunique()\n", "\n", - " # Calculate retention by cohort\n", - " cohort_sizes = cohort_data.groupby(\"cohort_week\")[\"trader_address\"].nunique()\n", + "print(cohort_sizes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ " retention_matrix = cohort_data.groupby([\"cohort_week\", \"week_number\"])[\n", " \"trader_address\"\n", " ].nunique()\n", - " retention_matrix = retention_matrix.unstack(fill_value=0)\n", - "\n", - " # Convert to percentages\n", - " retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100\n", - "\n", - " # Sort index (cohort_week) chronologically\n", - " retention_matrix.index = pd.to_datetime(retention_matrix.index)\n", - " retention_matrix = retention_matrix.sort_index()\n", - "\n", - " # Limit to max_weeks if specified\n", - " if max_weeks is not None and max_weeks < retention_matrix.shape[1]:\n", - " retention_matrix = retention_matrix.iloc[:, :max_weeks]\n", - "\n", - " return retention_matrix.round(2)" + " retention_matrix = retention_matrix.unstack(fill_value=0)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 54,