cyberosa commited on
Commit
330cbe3
·
1 Parent(s): db08a72

Added explanation text for divergence

Browse files
app.py CHANGED
@@ -22,8 +22,6 @@ from tabs.trader_plots import (
22
 
23
  from tabs.market_plots import (
24
  plot_kl_div_per_market,
25
- plot_kl_div_with_off_by,
26
- plot_kl_div_per_market2,
27
  )
28
 
29
 
@@ -203,7 +201,10 @@ with demo:
203
  with gr.Row():
204
  trade_details_text = get_metrics_text()
205
  with gr.Row():
206
- kl_div_plot = plot_kl_div_per_market2(closed_markets=closed_markets)
 
 
 
207
 
208
  with gr.TabItem("🎖️Weekly winning trades % per trader"):
209
  with gr.Row():
@@ -211,6 +212,7 @@ with demo:
211
  with gr.Row():
212
  metrics_text = get_metrics_text()
213
  with gr.Row():
 
214
  winning_metric = plot_winning_metric_per_trader(weekly_winning_metrics)
215
 
216
  demo.queue(default_concurrency_limit=40).launch()
 
22
 
23
  from tabs.market_plots import (
24
  plot_kl_div_per_market,
 
 
25
  )
26
 
27
 
 
201
  with gr.Row():
202
  trade_details_text = get_metrics_text()
203
  with gr.Row():
204
+ with gr.Column(scale=3):
205
+ kl_div_plot = plot_kl_div_per_market(closed_markets=closed_markets)
206
+ with gr.Column(scale=1):
207
+ interpretation = get_interpretation_text()
208
 
209
  with gr.TabItem("🎖️Weekly winning trades % per trader"):
210
  with gr.Row():
 
212
  with gr.Row():
213
  metrics_text = get_metrics_text()
214
  with gr.Row():
215
+
216
  winning_metric = plot_winning_metric_per_trader(weekly_winning_metrics)
217
 
218
  demo.queue(default_concurrency_limit=40).launch()
notebooks/closed_markets.ipynb CHANGED
@@ -1899,20 +1899,20 @@
1899
  },
1900
  {
1901
  "cell_type": "code",
1902
- "execution_count": 48,
1903
  "metadata": {},
1904
  "outputs": [
1905
  {
1906
  "name": "stdout",
1907
  "output_type": "stream",
1908
  "text": [
1909
- "4.605170185988092\n"
1910
  ]
1911
  }
1912
  ],
1913
  "source": [
1914
  "P = np.array([1.0, 0.0])\n",
1915
- "Q = np.array([0.01, 0.99])\n",
1916
  "print(kl_divergence(P,Q))"
1917
  ]
1918
  },
 
1899
  },
1900
  {
1901
  "cell_type": "code",
1902
+ "execution_count": 16,
1903
  "metadata": {},
1904
  "outputs": [
1905
  {
1906
  "name": "stdout",
1907
  "output_type": "stream",
1908
  "text": [
1909
+ "0.5108256237659907\n"
1910
  ]
1911
  }
1912
  ],
1913
  "source": [
1914
  "P = np.array([1.0, 0.0])\n",
1915
+ "Q = np.array([0.60, 0.05])\n",
1916
  "print(kl_divergence(P,Q))"
1917
  ]
1918
  },
notebooks/divergence.ipynb CHANGED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/Users/cyberosa/.pyenv/versions/hf_dashboards/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
13
+ " from .autonotebook import tqdm as notebook_tqdm\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "import pandas as pd\n",
19
+ "import gradio as gr\n",
20
+ "import plotly.express as px\n",
21
+ "import plotly.graph_objects as go\n",
22
+ "from plotly.subplots import make_subplots\n",
23
+ "import matplotlib.pyplot as plt\n",
24
+ "import seaborn as sns"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "execution_count": 2,
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "div_data = pd.read_parquet(\"../data/closed_markets_div.parquet\")"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 3,
39
+ "metadata": {},
40
+ "outputs": [
41
+ {
42
+ "data": {
43
+ "text/html": [
44
+ "<div>\n",
45
+ "<style scoped>\n",
46
+ " .dataframe tbody tr th:only-of-type {\n",
47
+ " vertical-align: middle;\n",
48
+ " }\n",
49
+ "\n",
50
+ " .dataframe tbody tr th {\n",
51
+ " vertical-align: top;\n",
52
+ " }\n",
53
+ "\n",
54
+ " .dataframe thead th {\n",
55
+ " text-align: right;\n",
56
+ " }\n",
57
+ "</style>\n",
58
+ "<table border=\"1\" class=\"dataframe\">\n",
59
+ " <thead>\n",
60
+ " <tr style=\"text-align: right;\">\n",
61
+ " <th></th>\n",
62
+ " <th>currentAnswer</th>\n",
63
+ " <th>id</th>\n",
64
+ " <th>openingTimestamp</th>\n",
65
+ " <th>market_creator</th>\n",
66
+ " <th>opening_datetime</th>\n",
67
+ " <th>first_outcome_prob</th>\n",
68
+ " <th>second_outcome_prob</th>\n",
69
+ " <th>kl_divergence</th>\n",
70
+ " <th>off_by_perc</th>\n",
71
+ " </tr>\n",
72
+ " </thead>\n",
73
+ " <tbody>\n",
74
+ " <tr>\n",
75
+ " <th>315</th>\n",
76
+ " <td>no</td>\n",
77
+ " <td>0x29462bf8c8f24772cd6da03878a4aee5c5813474</td>\n",
78
+ " <td>1724976000</td>\n",
79
+ " <td>pearl</td>\n",
80
+ " <td>2024-08-30 02:00:00</td>\n",
81
+ " <td>0.9416</td>\n",
82
+ " <td>0.0584</td>\n",
83
+ " <td>2.840439</td>\n",
84
+ " <td>94.16</td>\n",
85
+ " </tr>\n",
86
+ " <tr>\n",
87
+ " <th>323</th>\n",
88
+ " <td>yes</td>\n",
89
+ " <td>0x0ad9d4edb0a401ec9a5b4f2ccf7942d28c29d4e3</td>\n",
90
+ " <td>1724976000</td>\n",
91
+ " <td>quickstart</td>\n",
92
+ " <td>2024-08-30 02:00:00</td>\n",
93
+ " <td>0.0499</td>\n",
94
+ " <td>0.9501</td>\n",
95
+ " <td>2.997734</td>\n",
96
+ " <td>95.01</td>\n",
97
+ " </tr>\n",
98
+ " </tbody>\n",
99
+ "</table>\n",
100
+ "</div>"
101
+ ],
102
+ "text/plain": [
103
+ " currentAnswer id \\\n",
104
+ "315 no 0x29462bf8c8f24772cd6da03878a4aee5c5813474 \n",
105
+ "323 yes 0x0ad9d4edb0a401ec9a5b4f2ccf7942d28c29d4e3 \n",
106
+ "\n",
107
+ " openingTimestamp market_creator opening_datetime first_outcome_prob \\\n",
108
+ "315 1724976000 pearl 2024-08-30 02:00:00 0.9416 \n",
109
+ "323 1724976000 quickstart 2024-08-30 02:00:00 0.0499 \n",
110
+ "\n",
111
+ " second_outcome_prob kl_divergence off_by_perc \n",
112
+ "315 0.0584 2.840439 94.16 \n",
113
+ "323 0.9501 2.997734 95.01 "
114
+ ]
115
+ },
116
+ "execution_count": 3,
117
+ "metadata": {},
118
+ "output_type": "execute_result"
119
+ }
120
+ ],
121
+ "source": [
122
+ "div_data.loc[div_data[\"off_by_perc\"]>=90]"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 5,
128
+ "metadata": {},
129
+ "outputs": [
130
+ {
131
+ "data": {
132
+ "text/html": [
133
+ "<div>\n",
134
+ "<style scoped>\n",
135
+ " .dataframe tbody tr th:only-of-type {\n",
136
+ " vertical-align: middle;\n",
137
+ " }\n",
138
+ "\n",
139
+ " .dataframe tbody tr th {\n",
140
+ " vertical-align: top;\n",
141
+ " }\n",
142
+ "\n",
143
+ " .dataframe thead th {\n",
144
+ " text-align: right;\n",
145
+ " }\n",
146
+ "</style>\n",
147
+ "<table border=\"1\" class=\"dataframe\">\n",
148
+ " <thead>\n",
149
+ " <tr style=\"text-align: right;\">\n",
150
+ " <th></th>\n",
151
+ " <th>currentAnswer</th>\n",
152
+ " <th>id</th>\n",
153
+ " <th>openingTimestamp</th>\n",
154
+ " <th>market_creator</th>\n",
155
+ " <th>opening_datetime</th>\n",
156
+ " <th>first_outcome_prob</th>\n",
157
+ " <th>second_outcome_prob</th>\n",
158
+ " <th>kl_divergence</th>\n",
159
+ " <th>off_by_perc</th>\n",
160
+ " </tr>\n",
161
+ " </thead>\n",
162
+ " <tbody>\n",
163
+ " <tr>\n",
164
+ " <th>52</th>\n",
165
+ " <td>no</td>\n",
166
+ " <td>0x927beda324bfd4514a7b64ab5594451fdaf4796e</td>\n",
167
+ " <td>1722816000</td>\n",
168
+ " <td>quickstart</td>\n",
169
+ " <td>2024-08-05 02:00:00</td>\n",
170
+ " <td>0.8792</td>\n",
171
+ " <td>0.1208</td>\n",
172
+ " <td>2.113619</td>\n",
173
+ " <td>87.92</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>293</th>\n",
177
+ " <td>yes</td>\n",
178
+ " <td>0x90bb15982f2b5a5f044ad8ff49fe20daddfb8ca7</td>\n",
179
+ " <td>1724803200</td>\n",
180
+ " <td>quickstart</td>\n",
181
+ " <td>2024-08-28 02:00:00</td>\n",
182
+ " <td>0.1166</td>\n",
183
+ " <td>0.8834</td>\n",
184
+ " <td>2.149006</td>\n",
185
+ " <td>88.34</td>\n",
186
+ " </tr>\n",
187
+ " <tr>\n",
188
+ " <th>315</th>\n",
189
+ " <td>no</td>\n",
190
+ " <td>0x29462bf8c8f24772cd6da03878a4aee5c5813474</td>\n",
191
+ " <td>1724976000</td>\n",
192
+ " <td>pearl</td>\n",
193
+ " <td>2024-08-30 02:00:00</td>\n",
194
+ " <td>0.9416</td>\n",
195
+ " <td>0.0584</td>\n",
196
+ " <td>2.840439</td>\n",
197
+ " <td>94.16</td>\n",
198
+ " </tr>\n",
199
+ " <tr>\n",
200
+ " <th>323</th>\n",
201
+ " <td>yes</td>\n",
202
+ " <td>0x0ad9d4edb0a401ec9a5b4f2ccf7942d28c29d4e3</td>\n",
203
+ " <td>1724976000</td>\n",
204
+ " <td>quickstart</td>\n",
205
+ " <td>2024-08-30 02:00:00</td>\n",
206
+ " <td>0.0499</td>\n",
207
+ " <td>0.9501</td>\n",
208
+ " <td>2.997734</td>\n",
209
+ " <td>95.01</td>\n",
210
+ " </tr>\n",
211
+ " </tbody>\n",
212
+ "</table>\n",
213
+ "</div>"
214
+ ],
215
+ "text/plain": [
216
+ " currentAnswer id \\\n",
217
+ "52 no 0x927beda324bfd4514a7b64ab5594451fdaf4796e \n",
218
+ "293 yes 0x90bb15982f2b5a5f044ad8ff49fe20daddfb8ca7 \n",
219
+ "315 no 0x29462bf8c8f24772cd6da03878a4aee5c5813474 \n",
220
+ "323 yes 0x0ad9d4edb0a401ec9a5b4f2ccf7942d28c29d4e3 \n",
221
+ "\n",
222
+ " openingTimestamp market_creator opening_datetime first_outcome_prob \\\n",
223
+ "52 1722816000 quickstart 2024-08-05 02:00:00 0.8792 \n",
224
+ "293 1724803200 quickstart 2024-08-28 02:00:00 0.1166 \n",
225
+ "315 1724976000 pearl 2024-08-30 02:00:00 0.9416 \n",
226
+ "323 1724976000 quickstart 2024-08-30 02:00:00 0.0499 \n",
227
+ "\n",
228
+ " second_outcome_prob kl_divergence off_by_perc \n",
229
+ "52 0.1208 2.113619 87.92 \n",
230
+ "293 0.8834 2.149006 88.34 \n",
231
+ "315 0.0584 2.840439 94.16 \n",
232
+ "323 0.9501 2.997734 95.01 "
233
+ ]
234
+ },
235
+ "execution_count": 5,
236
+ "metadata": {},
237
+ "output_type": "execute_result"
238
+ }
239
+ ],
240
+ "source": [
241
+ "div_data.loc[div_data[\"kl_divergence\"]>=2.0]"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": null,
247
+ "metadata": {},
248
+ "outputs": [],
249
+ "source": []
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "metadata": {},
255
+ "outputs": [],
256
+ "source": [
257
+ "all_markets = closed_markets.copy(deep=True)\n",
258
+ " all_markets[\"market_creator\"] = \"all\"\n",
259
+ "\n",
260
+ " # merging both dataframes\n",
261
+ " final_markets = pd.concat([div_data, all_markets], ignore_index=True)\n",
262
+ " final_markets = final_markets.sort_values(by=\"opening_datetime\", ascending=True)\n",
263
+ "\n",
264
+ " # Create the main figure and axis\n",
265
+ " fig, ax1 = plt.subplots(figsize=(10, 6))\n",
266
+ "\n",
267
+ " # Create the boxplot using seaborn\n",
268
+ " sns.boxplot(\n",
269
+ " data=closed_markets,\n",
270
+ " x=\"month_year_week\",\n",
271
+ " y=\"kl_divergence\",\n",
272
+ " ax=ax1,\n",
273
+ " hue=\"market_creator\",\n",
274
+ " order=[\"pearl\", \"quickstart\", \"all\"],\n",
275
+ " )\n",
276
+ "\n",
277
+ " # Set labels and title for the main axis\n",
278
+ " ax1.set_xlabel(\"Week\")\n",
279
+ " ax1.set_ylabel(\"KL Divergence\")\n",
280
+ " ax1.set_title(\"KL Divergence Boxplot with Off-by Percentage\")\n",
281
+ "\n",
282
+ " # Create a secondary y-axis\n",
283
+ " ax2 = ax1.twinx()\n",
284
+ "\n",
285
+ " # Plot the off_by_perc values on the secondary y-axis\n",
286
+ " for i, week in enumerate(closed_markets[\"month_year_week\"].unique()):\n",
287
+ " off_by_perc = closed_markets[closed_markets[\"month_year_week\"] == week][\n",
288
+ " \"off_by_perc\"\n",
289
+ " ]\n",
290
+ " ax2.scatter([i] * len(off_by_perc), off_by_perc, color=\"red\", alpha=0.01)\n",
291
+ "\n",
292
+ " # Set label for the secondary y-axis\n",
293
+ " ax2.set_ylabel(\"Off-by Percentage\")\n",
294
+ "\n",
295
+ " # Adjust the layout and display the plot\n",
296
+ " plt.tight_layout()"
297
+ ]
298
+ }
299
+ ],
300
+ "metadata": {
301
+ "kernelspec": {
302
+ "display_name": "hf_dashboards",
303
+ "language": "python",
304
+ "name": "python3"
305
+ },
306
+ "language_info": {
307
+ "codemirror_mode": {
308
+ "name": "ipython",
309
+ "version": 3
310
+ },
311
+ "file_extension": ".py",
312
+ "mimetype": "text/x-python",
313
+ "name": "python",
314
+ "nbconvert_exporter": "python",
315
+ "pygments_lexer": "ipython3",
316
+ "version": "3.12.2"
317
+ }
318
+ },
319
+ "nbformat": 4,
320
+ "nbformat_minor": 2
321
+ }
tabs/market_plots.py CHANGED
@@ -30,6 +30,8 @@ def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
30
  xaxis_title="Markets closing Week",
31
  yaxis_title="Kullback–Leibler divergence",
32
  legend=dict(yanchor="top", y=0.5),
 
 
33
  )
34
 
35
  fig.update_xaxes(tickformat="%b %d\n%Y")
@@ -83,77 +85,3 @@ def plot_kl_div_with_off_by(closed_markets: pd.DataFrame) -> gr.Plot:
83
  return gr.Plot(
84
  value=fig,
85
  )
86
-
87
-
88
- def plot_kl_div_per_market2(closed_markets: pd.DataFrame) -> gr.Plot:
89
-
90
- # adding the total
91
- all_markets = closed_markets.copy(deep=True)
92
- all_markets["market_creator"] = "all"
93
-
94
- # merging both dataframes
95
- final_markets = pd.concat([closed_markets, all_markets], ignore_index=True)
96
- final_markets = final_markets.sort_values(by="opening_datetime", ascending=True)
97
-
98
- # Create the figure with secondary y-axis
99
- fig = make_subplots(specs=[[{"secondary_y": True}]])
100
-
101
- # Add box plots for KL divergence
102
- for creator in ["pearl", "quickstart", "all"]:
103
- fig.add_trace(
104
- go.Box(
105
- x=final_markets[final_markets["market_creator"] == creator][
106
- "month_year_week"
107
- ],
108
- y=final_markets[final_markets["market_creator"] == creator][
109
- "kl_divergence"
110
- ],
111
- name=creator,
112
- boxmean=True,
113
- marker_color={
114
- "pearl": "purple",
115
- "quickstart": "goldenrod",
116
- "all": "darkgreen",
117
- }[creator],
118
- ),
119
- secondary_y=False,
120
- )
121
- fig.add_trace(
122
- go.Scatter(
123
- x=final_markets[final_markets["market_creator"] == creator][
124
- "month_year_week"
125
- ],
126
- y=final_markets[final_markets["market_creator"] == creator][
127
- "off_by_perc"
128
- ],
129
- opacity=0.01,
130
- showlegend=False,
131
- hoverinfo="skip",
132
- ),
133
- secondary_y=True,
134
- )
135
- # Update layout
136
- fig.update_layout(
137
- xaxis_title="Markets closing Week",
138
- # legend=dict(yanchor="top", y=0.5, xanchor="left", x=-0.5),
139
- legend=dict(yanchor="top", y=0.5),
140
- boxmode="group",
141
- width=1000, # Adjusted for better fit on laptop screens
142
- height=600, # Adjusted for better fit on laptop screens
143
- )
144
- # Update y-axes
145
- fig.update_yaxes(
146
- title_text="Kullback–Leibler divergence", secondary_y=False, side="left"
147
- )
148
- fig.update_yaxes(
149
- title_text="Off by percentage between probabilities",
150
- secondary_y=True,
151
- side="right",
152
- )
153
-
154
- # Update x-axis
155
- fig.update_xaxes(tickformat="%b %d\n%Y")
156
-
157
- return gr.Plot(
158
- value=fig,
159
- )
 
30
  xaxis_title="Markets closing Week",
31
  yaxis_title="Kullback–Leibler divergence",
32
  legend=dict(yanchor="top", y=0.5),
33
+ width=1000, # Adjusted for better fit on laptop screens
34
+ height=600, # Adjusted for better fit on laptop screens
35
  )
36
 
37
  fig.update_xaxes(tickformat="%b %d\n%Y")
 
85
  return gr.Plot(
86
  value=fig,
87
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tabs/trader_plots.py CHANGED
@@ -26,28 +26,19 @@ def get_metrics_text() -> gr.Markdown:
26
 
27
  def get_interpretation_text() -> gr.Markdown:
28
  interpretation_text = """
29
- ## Interpreting KL-divergence values
30
- ### Moderate divergence:
31
- * Y 0.9163
32
- * Wrong market prediction closing at [0.4, 0.6] or [0.6, 0.4]
33
- * Y 1.2040
34
- * Wrong market prediction closing at [0.3, 0.7] or [0.7, 0.3]
35
- * Y 1.6093
36
- * Wrong market prediction closing at [0.2, 0.8] or [0.8, 0.2]
37
- * Y 2.3026
38
- * Wrong market prediction closing at [0.1, 0.9] or [0.9, 0.1]
39
-
40
- ### High divergence:
41
- * Y 4.6052
42
- * Very wrong prediction closing at [0.01, 0.99] or [0.99, 0.01]
43
- * Y ≈ 6.9078
44
- * Extremely wrong prediction closing at [0.001, 0.999] or [0.999, 0.001]
45
-
46
- ### Edge cases:
47
- * Y ≈ 13.8155
48
- * Near-certain wrong prediction closing at [1e-6, 0.999999] or [0.999999, 1e-6]
49
- * Y = 20 (capped)
50
- * Represents cases where the market prediction was essentially opposite to the outcome.
51
  """
52
  return gr.Markdown(interpretation_text)
53
 
 
26
 
27
  def get_interpretation_text() -> gr.Markdown:
28
  interpretation_text = """
29
+ ## Meaning of KL-divergence values
30
+ * Y = 0.05129
31
+ * Market accuracy off by 5%
32
+ * Y = 0.1053
33
+ * Market accuracy off by 10%
34
+ * Y = 0.2876
35
+ * Market accuracy off by 25%
36
+ * Y = 0.5108
37
+ * Market accuracy off by 40%
38
+ * Y = 1.2040
39
+ * Market accuracy off by 70%
40
+ * Y = 2.3026
41
+ * Market accuracy off by 90%
 
 
 
 
 
 
 
 
 
42
  """
43
  return gr.Markdown(interpretation_text)
44