cyberosa commited on
Commit
63c3662
·
1 Parent(s): 6d1850e

adding cohort retention graphs and restoring unknown traders file

Browse files
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
  import duckdb
4
  import logging
5
 
@@ -212,7 +213,7 @@ with demo:
212
  )
213
 
214
  with gr.Row():
215
- gr.Markdown("# Weekly metrics of 🌊 Olas traders")
216
  with gr.Row():
217
  trader_o_details_selector = gr.Dropdown(
218
  label="Select a weekly trader metric",
@@ -431,21 +432,79 @@ with demo:
431
  wow_retention=wow_retention
432
  )
433
 
434
- # with gr.Row():
435
- # gr.Markdown("# Cohort retention in pearl traders")
436
- # with gr.Row():
437
- # cohort_retention = calculate_cohort_retention(df=retention_df)
438
- # cohort_retention_plot = plot_cohort_retention_heatmap(
439
- # retention_matrix=cohort_retention
440
- # )
441
- # with gr.Row():
442
- # gr.Markdown("# Cohort retention in qs traders")
443
- # with gr.Row():
444
- # cohort_retention = calculate_cohort_retention(df=retention_df)
445
- # cohort_retention_plot = plot_cohort_retention_heatmap(
446
- # retention_matrix=cohort_retention
447
- # )
448
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  with gr.TabItem("⚙️ Active traders"):
450
  with gr.Row():
451
  gr.Markdown("# Active traders for all markets by trader categories")
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import seaborn as sns
4
  import duckdb
5
  import logging
6
 
 
213
  )
214
 
215
  with gr.Row():
216
+ gr.Markdown("# Weekly metrics of 🌊 Olas traders")
217
  with gr.Row():
218
  trader_o_details_selector = gr.Dropdown(
219
  label="Select a weekly trader metric",
 
432
  wow_retention=wow_retention
433
  )
434
 
435
+ with gr.Row():
436
+ gr.Markdown("# Cohort retention in pearl traders")
437
+ with gr.Row():
438
+ with gr.Column(scale=1):
439
+ gr.Markdown("## Cohort retention of 🌊 Olas traders")
440
+ cohort_retention_olas_pearl = calculate_cohort_retention(
441
+ df=retention_df, market_creator="pearl", trader_type="Olas"
442
+ )
443
+ cohort_retention_plot1 = plot_cohort_retention_heatmap(
444
+ retention_matrix=cohort_retention_olas_pearl, cmap="Purples"
445
+ )
446
+ with gr.Column(scale=1):
447
+ gr.Markdown("## Cohort retention of Non-Olas traders")
448
+ # non_Olas
449
+ cohort_retention_non_olas_pearl = calculate_cohort_retention(
450
+ df=retention_df, market_creator="pearl", trader_type="non_Olas"
451
+ )
452
+ cohort_retention_plot2 = plot_cohort_retention_heatmap(
453
+ retention_matrix=cohort_retention_non_olas_pearl,
454
+ cmap=sns.color_palette("light:goldenrod", as_cmap=True),
455
+ )
456
+ with gr.Row():
457
+ with gr.Column(scale=1):
458
+ gr.Markdown("## Cohort retention of unclassified traders")
459
+ cohort_retention_unclassified_pearl = calculate_cohort_retention(
460
+ df=retention_df,
461
+ market_creator="pearl",
462
+ trader_type="unclassified",
463
+ )
464
+ cohort_retention_plot3 = plot_cohort_retention_heatmap(
465
+ retention_matrix=cohort_retention_unclassified_pearl,
466
+ cmap="Greens",
467
+ )
468
+ with gr.Column(scale=1):
469
+ print("Adding explanatory text")
470
+ with gr.Row():
471
+ gr.Markdown("# Cohort retention in quickstart traders")
472
+ with gr.Row():
473
+ with gr.Column(scale=1):
474
+ gr.Markdown("## Cohort retention of 🌊 Olas traders")
475
+ cohort_retention_olas_qs = calculate_cohort_retention(
476
+ df=retention_df, market_creator="quickstart", trader_type="Olas"
477
+ )
478
+ cohort_retention_plot4 = plot_cohort_retention_heatmap(
479
+ retention_matrix=cohort_retention_olas_qs,
480
+ cmap="Purples",
481
+ )
482
+ with gr.Column(scale=1):
483
+ gr.Markdown("## Cohort retention of Non-Olas traders")
484
+ # non_Olas
485
+ cohort_retention_non_olas_qs = calculate_cohort_retention(
486
+ df=retention_df,
487
+ market_creator="quickstart",
488
+ trader_type="non_Olas",
489
+ )
490
+ cohort_retention_plot5 = plot_cohort_retention_heatmap(
491
+ retention_matrix=cohort_retention_non_olas_qs,
492
+ cmap=sns.color_palette("light:goldenrod", as_cmap=True),
493
+ )
494
+ with gr.Row():
495
+ with gr.Column(scale=1):
496
+ gr.Markdown("## Cohort retention of unclassified traders")
497
+ cohort_retention_unclassified_qs = calculate_cohort_retention(
498
+ df=retention_df,
499
+ market_creator="quickstart",
500
+ trader_type="unclassified",
501
+ )
502
+ cohort_retention_plot6 = plot_cohort_retention_heatmap(
503
+ retention_matrix=cohort_retention_unclassified_qs,
504
+ cmap="Greens",
505
+ )
506
+ with gr.Column(scale=1):
507
+ print("Adding explanatory text")
508
  with gr.TabItem("⚙️ Active traders"):
509
  with gr.Row():
510
  gr.Markdown("# Active traders for all markets by trader categories")
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de9aa0ab5acaf1623b604d88a97fa4d2369ba490e872fbeb95f8308a870e6149
3
- size 210024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:facb6d44b0ca6896cd98108283bc9527aee15ca3ca99df7a7c364ad2fb53b172
3
+ size 320009
notebooks/closed_markets.ipynb CHANGED
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 9,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
@@ -48,7 +48,7 @@
48
  },
49
  {
50
  "cell_type": "code",
51
- "execution_count": 6,
52
  "metadata": {},
53
  "outputs": [
54
  {
@@ -56,38 +56,38 @@
56
  "output_type": "stream",
57
  "text": [
58
  "<class 'pandas.core.frame.DataFrame'>\n",
59
- "RangeIndex: 118880 entries, 0 to 118879\n",
60
  "Data columns (total 26 columns):\n",
61
  " # Column Non-Null Count Dtype \n",
62
  "--- ------ -------------- ----- \n",
63
- " 0 collateralAmount 118880 non-null object \n",
64
- " 1 collateralAmountUSD 118880 non-null object \n",
65
- " 2 collateralToken 118880 non-null object \n",
66
- " 3 creationTimestamp 118880 non-null datetime64[ns, UTC]\n",
67
- " 4 trader_address 118880 non-null object \n",
68
- " 5 feeAmount 118880 non-null object \n",
69
- " 6 id 118880 non-null object \n",
70
- " 7 oldOutcomeTokenMarginalPrice 118880 non-null object \n",
71
- " 8 outcomeIndex 118880 non-null object \n",
72
- " 9 outcomeTokenMarginalPrice 118880 non-null object \n",
73
- " 10 outcomeTokensTraded 118880 non-null object \n",
74
- " 11 title 118880 non-null object \n",
75
- " 12 transactionHash 118880 non-null object \n",
76
- " 13 type 118880 non-null object \n",
77
- " 14 market_creator 118880 non-null object \n",
78
- " 15 fpmm.answerFinalizedTimestamp 78679 non-null object \n",
79
- " 16 fpmm.arbitrationOccurred 118880 non-null bool \n",
80
- " 17 fpmm.currentAnswer 78679 non-null object \n",
81
- " 18 fpmm.id 118880 non-null object \n",
82
- " 19 fpmm.isPendingArbitration 118880 non-null bool \n",
83
- " 20 fpmm.openingTimestamp 118880 non-null object \n",
84
- " 21 fpmm.outcomes 118880 non-null object \n",
85
- " 22 fpmm.title 118880 non-null object \n",
86
- " 23 fpmm.condition.id 118880 non-null object \n",
87
- " 24 creation_timestamp 118880 non-null datetime64[ns, UTC]\n",
88
- " 25 creation_date 118880 non-null object \n",
89
  "dtypes: bool(2), datetime64[ns, UTC](2), object(22)\n",
90
- "memory usage: 22.0+ MB\n"
91
  ]
92
  }
93
  ],
@@ -97,7 +97,7 @@
97
  },
98
  {
99
  "cell_type": "code",
100
- "execution_count": 3,
101
  "metadata": {},
102
  "outputs": [],
103
  "source": [
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "cell_type": "code",
112
- "execution_count": 10,
113
  "metadata": {},
114
  "outputs": [],
115
  "source": [
@@ -127,16 +127,16 @@
127
  },
128
  {
129
  "cell_type": "code",
130
- "execution_count": 11,
131
  "metadata": {},
132
  "outputs": [
133
  {
134
  "data": {
135
  "text/plain": [
136
- "Timestamp('2025-01-13 00:00:00')"
137
  ]
138
  },
139
- "execution_count": 11,
140
  "metadata": {},
141
  "output_type": "execute_result"
142
  }
 
36
  },
37
  {
38
  "cell_type": "code",
39
+ "execution_count": 3,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
 
48
  },
49
  {
50
  "cell_type": "code",
51
+ "execution_count": 4,
52
  "metadata": {},
53
  "outputs": [
54
  {
 
56
  "output_type": "stream",
57
  "text": [
58
  "<class 'pandas.core.frame.DataFrame'>\n",
59
+ "RangeIndex: 117525 entries, 0 to 117524\n",
60
  "Data columns (total 26 columns):\n",
61
  " # Column Non-Null Count Dtype \n",
62
  "--- ------ -------------- ----- \n",
63
+ " 0 collateralAmount 117525 non-null object \n",
64
+ " 1 collateralAmountUSD 117525 non-null object \n",
65
+ " 2 collateralToken 117525 non-null object \n",
66
+ " 3 creationTimestamp 117525 non-null datetime64[ns, UTC]\n",
67
+ " 4 trader_address 117525 non-null object \n",
68
+ " 5 feeAmount 117525 non-null object \n",
69
+ " 6 id 117525 non-null object \n",
70
+ " 7 oldOutcomeTokenMarginalPrice 117525 non-null object \n",
71
+ " 8 outcomeIndex 117525 non-null object \n",
72
+ " 9 outcomeTokenMarginalPrice 117525 non-null object \n",
73
+ " 10 outcomeTokensTraded 117525 non-null object \n",
74
+ " 11 title 117525 non-null object \n",
75
+ " 12 transactionHash 117525 non-null object \n",
76
+ " 13 type 117525 non-null object \n",
77
+ " 14 market_creator 117525 non-null object \n",
78
+ " 15 fpmm.answerFinalizedTimestamp 77324 non-null object \n",
79
+ " 16 fpmm.arbitrationOccurred 117525 non-null bool \n",
80
+ " 17 fpmm.currentAnswer 77324 non-null object \n",
81
+ " 18 fpmm.id 117525 non-null object \n",
82
+ " 19 fpmm.isPendingArbitration 117525 non-null bool \n",
83
+ " 20 fpmm.openingTimestamp 117525 non-null object \n",
84
+ " 21 fpmm.outcomes 117525 non-null object \n",
85
+ " 22 fpmm.title 117525 non-null object \n",
86
+ " 23 fpmm.condition.id 117525 non-null object \n",
87
+ " 24 creation_timestamp 117525 non-null datetime64[ns, UTC]\n",
88
+ " 25 creation_date 117525 non-null object \n",
89
  "dtypes: bool(2), datetime64[ns, UTC](2), object(22)\n",
90
+ "memory usage: 21.7+ MB\n"
91
  ]
92
  }
93
  ],
 
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 5,
101
  "metadata": {},
102
  "outputs": [],
103
  "source": [
 
109
  },
110
  {
111
  "cell_type": "code",
112
+ "execution_count": 6,
113
  "metadata": {},
114
  "outputs": [],
115
  "source": [
 
127
  },
128
  {
129
  "cell_type": "code",
130
+ "execution_count": 7,
131
  "metadata": {},
132
  "outputs": [
133
  {
134
  "data": {
135
  "text/plain": [
136
+ "Timestamp('2024-12-28 00:00:00')"
137
  ]
138
  },
139
+ "execution_count": 7,
140
  "metadata": {},
141
  "output_type": "execute_result"
142
  }
notebooks/retention_metrics.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/unknown_traders.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/retention_metrics.py CHANGED
@@ -64,9 +64,11 @@ def calculate_wow_retention_by_type(
64
 
65
  # Cohort Retention
66
  def calculate_cohort_retention(
67
- df: pd.DataFrame, trader_type: str, max_weeks=12
68
  ) -> pd.DataFrame:
69
- df_filtered = df.loc[df["trader_type"] == trader_type]
 
 
70
  # Get first week for each trader
71
  first_trades = (
72
  df_filtered.groupby("trader_address")
@@ -76,7 +78,7 @@ def calculate_cohort_retention(
76
  first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
77
 
78
  # Get ordered list of unique weeks - converting to datetime for proper sorting
79
- all_weeks = df["month_year_week"].unique()
80
  weeks_datetime = pd.to_datetime(all_weeks)
81
  sorted_weeks_idx = weeks_datetime.argsort()
82
  all_weeks = all_weeks[sorted_weeks_idx]
@@ -86,7 +88,9 @@ def calculate_cohort_retention(
86
 
87
  # Merge back to get all activities
88
  cohort_data = pd.merge(
89
- df, first_trades[["trader_address", "cohort_week"]], on="trader_address"
 
 
90
  )
91
 
92
  # Calculate week number since first activity
 
64
 
65
  # Cohort Retention
66
  def calculate_cohort_retention(
67
+ df: pd.DataFrame, market_creator: str, trader_type: str, max_weeks=12
68
  ) -> pd.DataFrame:
69
+ df_filtered = df.loc[
70
+ (df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
71
+ ]
72
  # Get first week for each trader
73
  first_trades = (
74
  df_filtered.groupby("trader_address")
 
78
  first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
79
 
80
  # Get ordered list of unique weeks - converting to datetime for proper sorting
81
+ all_weeks = df_filtered["month_year_week"].unique()
82
  weeks_datetime = pd.to_datetime(all_weeks)
83
  sorted_weeks_idx = weeks_datetime.argsort()
84
  all_weeks = all_weeks[sorted_weeks_idx]
 
88
 
89
  # Merge back to get all activities
90
  cohort_data = pd.merge(
91
+ df_filtered,
92
+ first_trades[["trader_address", "cohort_week"]],
93
+ on="trader_address",
94
  )
95
 
96
  # Calculate week number since first activity
tabs/retention_plots.py CHANGED
@@ -22,6 +22,7 @@ def plot_wow_retention_by_type(wow_retention):
22
  "retention_rate": "Retention Rate (%)",
23
  "trader_type": "Trader Type",
24
  },
 
25
  )
26
 
27
  fig.update_layout(
@@ -53,13 +54,13 @@ def plot_wow_retention_by_type(wow_retention):
53
  )
54
 
55
 
56
- def plot_cohort_retention_heatmap(retention_matrix: pd.DataFrame):
57
 
58
  # Create a copy of the matrix to avoid modifying the original
59
  retention_matrix = retention_matrix.copy()
60
 
61
  # Convert index to datetime and format to date string
62
- retention_matrix.index = pd.to_datetime(retention_matrix.index).strftime("%Y-%m-%d")
63
 
64
  # Create figure and axes with specified size
65
  plt.figure(figsize=(12, 8))
@@ -72,7 +73,7 @@ def plot_cohort_retention_heatmap(retention_matrix: pd.DataFrame):
72
  data=retention_matrix,
73
  annot=True, # Show numbers in cells
74
  fmt=".1f", # Format numbers to 1 decimal place
75
- cmap="YlOrRd", # Yellow to Orange to Red color scheme
76
  vmin=0,
77
  vmax=100,
78
  center=50,
 
22
  "retention_rate": "Retention Rate (%)",
23
  "trader_type": "Trader Type",
24
  },
25
+ color_discrete_sequence=["purple", "goldenrod", "green"],
26
  )
27
 
28
  fig.update_layout(
 
54
  )
55
 
56
 
57
+ def plot_cohort_retention_heatmap(retention_matrix: pd.DataFrame, cmap: str):
58
 
59
  # Create a copy of the matrix to avoid modifying the original
60
  retention_matrix = retention_matrix.copy()
61
 
62
  # Convert index to datetime and format to date string
63
+ retention_matrix.index = pd.to_datetime(retention_matrix.index).strftime("%a-%b %d")
64
 
65
  # Create figure and axes with specified size
66
  plt.figure(figsize=(12, 8))
 
73
  data=retention_matrix,
74
  annot=True, # Show numbers in cells
75
  fmt=".1f", # Format numbers to 1 decimal place
76
+ cmap=cmap, # Yellow to Orange to Red color scheme
77
  vmin=0,
78
  vmax=100,
79
  center=50,