patrickramos commited on
Commit
eaf2663
·
1 Parent(s): c0335c8

Add date filtering for pitcher dashboard

Browse files
daily_weekly_leaderboard.py CHANGED
@@ -10,7 +10,7 @@ import datetime
10
  df = (
11
  df
12
  # .join(game_df, on='game_pk')
13
- .with_columns(pl.col('game_date').str.to_datetime())
14
  .rename({
15
  'name': 'Name',
16
  'release_speed': 'Velocity',
 
10
  df = (
11
  df
12
  # .join(game_df, on='game_pk')
13
+ # .with_columns(pl.col('game_date').str.to_datetime())
14
  .rename({
15
  'name': 'Name',
16
  'release_speed': 'Velocity',
data.py CHANGED
@@ -8,6 +8,7 @@ from tqdm.auto import tqdm
8
  import os
9
  import re
10
 
 
11
  from translate import (
12
  translate_pa_outcome, translate_pitch_outcome,
13
  jp_pitch_to_en_pitch, jp_pitch_to_pitch_code,
@@ -31,7 +32,7 @@ def identify_bb_type(hit_type):
31
 
32
 
33
  DATA_DIR = 'data'
34
- SEASONS = sorted([folder for folder in os.listdir(DATA_DIR) if not folder.startswith('.')])
35
 
36
  game_df, pa_df, pitch_df, player_df, df = [], [], [], [], []
37
 
@@ -117,10 +118,10 @@ for season in SEASONS:
117
  pl.col('pitch_name').alias('jp_pitch_name')
118
  )
119
  .with_columns(
120
- pl.col('jp_pitch_name').map_elements(lambda pitch_name: jp_pitch_to_en_pitch[pitch_name], return_dtype=str).alias('pitch_name'),
121
- # pl.col('jp_pitch_name').replace_strict(jp_pitch_to_en_pitch).alias('pitch_name'),
122
- pl.col('jp_pitch_name').map_elements(lambda pitch_name: jp_pitch_to_pitch_code[pitch_name], return_dtype=str).alias('pitch_type'),
123
- # pl.col('jp_pitch_name').map_elements(jp_pitch_to_pitch_code).alias('pitch_type'),
124
  pl.col('description').str.split(' ').list.first().map_elements(translate_pitch_outcome, return_dtype=str),
125
  pl.when(
126
  pl.col('release_speed') != '-'
@@ -167,7 +168,8 @@ for season in SEASONS:
167
  pl.col('description').is_in(['SS', 'K']).alias('whiff'),
168
  ~pl.col('description').is_in(['B', 'BB', 'LS', 'inv_K', 'bunt_K', 'HBP', 'SH', 'SH E', 'SH FC', 'obstruction', 'illegal_pitch', 'defensive_interference']).alias('swing'),
169
  pl.col('description').is_in(['SS', 'K', 'LS', 'inv_K']).alias('csw'),
170
- ~pl.col('description').is_in(['obstruction', 'illegal_pitch', 'defensive_interference']).alias('normal_pitch') # guess
 
171
  )
172
  ).sort(['game_pk', 'pa_pk', 'pitch_id'])
173
 
 
8
  import os
9
  import re
10
 
11
+ from seasons import SEASONS
12
  from translate import (
13
  translate_pa_outcome, translate_pitch_outcome,
14
  jp_pitch_to_en_pitch, jp_pitch_to_pitch_code,
 
32
 
33
 
34
  DATA_DIR = 'data'
35
+ SEASONS = [str(season) for season in SEASONS]
36
 
37
  game_df, pa_df, pitch_df, player_df, df = [], [], [], [], []
38
 
 
118
  pl.col('pitch_name').alias('jp_pitch_name')
119
  )
120
  .with_columns(
121
+ # pl.col('jp_pitch_name').map_elements(lambda pitch_name: jp_pitch_to_en_pitch[pitch_name], return_dtype=str).alias('pitch_name'),
122
+ pl.col('jp_pitch_name').replace_strict(jp_pitch_to_en_pitch).alias('pitch_name'),
123
+ # pl.col('jp_pitch_name').map_elements(lambda pitch_name: jp_pitch_to_pitch_code[pitch_name], return_dtype=str).alias('pitch_type'),
124
+ pl.col('jp_pitch_name').replace_strict(jp_pitch_to_pitch_code).alias('pitch_type'),
125
  pl.col('description').str.split(' ').list.first().map_elements(translate_pitch_outcome, return_dtype=str),
126
  pl.when(
127
  pl.col('release_speed') != '-'
 
168
  pl.col('description').is_in(['SS', 'K']).alias('whiff'),
169
  ~pl.col('description').is_in(['B', 'BB', 'LS', 'inv_K', 'bunt_K', 'HBP', 'SH', 'SH E', 'SH FC', 'obstruction', 'illegal_pitch', 'defensive_interference']).alias('swing'),
170
  pl.col('description').is_in(['SS', 'K', 'LS', 'inv_K']).alias('csw'),
171
+ ~pl.col('description').is_in(['obstruction', 'illegal_pitch', 'defensive_interference']).alias('normal_pitch'), # guess
172
+ pl.col('game_date').str.to_datetime()
173
  )
174
  ).sort(['game_pk', 'pa_pk', 'pitch_id'])
175
 
gradio_function.py CHANGED
@@ -26,20 +26,20 @@ INSUFFICIENT_PITCHES_MSG_MULTI_LINE = 'No visualization:<br>Not enough pitches t
26
 
27
  # GRADIO FUNCTIONS
28
 
29
- def clone_if_dataframe(item):
30
- if isinstance(item, pl.DataFrame):
31
- # print(type(item))
32
- return item.clone()
33
- else:
34
- return item
35
-
36
- def clone_df(fn):
37
- def _fn(*args, **kwargs):
38
- args = [clone_if_dataframe(arg) for arg in args]
39
- kwargs = {k: clone_if_dataframe(arg) for k, arg in kwargs.items()}
40
- return fn(*args, **kwargs)
41
- return _fn
42
-
43
  def copy_dataframe(df, num_copy_to):
44
  return [df.clone() for _ in range(num_copy_to)]
45
 
@@ -79,7 +79,7 @@ colorscale = [
79
  ]
80
 
81
 
82
- @clone_df
83
  def plot_loc(df, handedness, league_df=None, min_pitches=3, max_pitches=5000):
84
 
85
  loc = df.select(['plate_x', 'plate_z'])
@@ -166,7 +166,7 @@ def plot_loc(df, handedness, league_df=None, min_pitches=3, max_pitches=5000):
166
 
167
 
168
  # velo distribution
169
- @clone_df
170
  def plot_velo(df=None, player=None, velos=None, pitch_type=None, pitch_name=None, min_pitches=2):
171
  assert not ((velos is None and player is None) or (velos is not None and player is not None)), 'exactly one of `player` or `velos` must be specified'
172
 
@@ -212,7 +212,7 @@ def plot_velo(df=None, player=None, velos=None, pitch_type=None, pitch_name=None
212
  )
213
  return fig
214
 
215
- @clone_df
216
  def plot_velo_summary(df, league_df, player):
217
 
218
  min_pitches = 2
@@ -227,7 +227,9 @@ def plot_velo_summary(df, league_df, player):
227
 
228
  fig = go.Figure()
229
 
230
- velo_center = (player_df['release_speed'].min() + player_df['release_speed'].max()) / 2
 
 
231
  # for i, (pitch_name, count) in enumerate(pitch_counts.items()):
232
  for i, (pitch_name, count) in enumerate(pitch_counts.iter_rows()):
233
  # velos = player_df.loc[pitch_name, 'release_speed']
@@ -299,7 +301,7 @@ def plot_velo_summary(df, league_df, player):
299
  # ))
300
 
301
  # fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].dropna().min() - 2, player_df['release_speed'].dropna().max() + 2])
302
- fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].min() - 2, player_df['release_speed'].max() + 2])
303
  # fig.update_yaxes(range=[0, len(pitch_counts)+1-0.25], visible=False)
304
  fig.update_yaxes(range=[0, len(pitch_counts)-0.25], visible=False)
305
  fig.update_layout(
@@ -313,7 +315,8 @@ def plot_velo_summary(df, league_df, player):
313
  return fig
314
 
315
 
316
- def update_dfs(player, handedness, df):
 
317
  if handedness == 'Both':
318
  handedness_filter = pl.col('stand').is_in(['R', 'L'])
319
  # _pitch_stats = pitch_stats
@@ -327,9 +330,10 @@ def update_dfs(player, handedness, df):
327
  # _pitch_stats = lhb_pitch_stats
328
  # _league_pitch_stats = lhb_league_pitch_stats
329
  player_filter = pl.col('name') == player
330
- final_filter = player_filter & handedness_filter
 
331
  _df = df.filter(final_filter)
332
- _league_df = df.filter(handedness_filter)
333
 
334
  return (
335
  _df,
@@ -347,13 +351,13 @@ def create_set_download_file_fn(filepath):
347
  def preview_df(df):
348
  return df.head()
349
 
350
- @clone_df
351
  def plot_usage(df, player):
352
  fig = px.pie(df.select('pitch_name'), names='pitch_name')
353
  fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'<b>{player}</b><br>' + 'threw a <b>%{label}</b><br><b>%{percent:.1%}</b> of the time (<b>%{value}</b> pitches)')
354
  return fig
355
 
356
- @clone_df
357
  def plot_pitch_cards(df, league_df, pitch_stats, handedness):
358
  pitch_counts = df['pitch_name'].value_counts().sort('count', descending=True)
359
 
@@ -400,7 +404,7 @@ def plot_pitch_cards(df, league_df, pitch_stats, handedness):
400
 
401
  return pitch_rows + pitch_groups + pitch_names + pitch_infos + pitch_velos + pitch_locs
402
 
403
- @clone_df
404
  def update_velo_stats(pitch_stats, league_pitch_stats):
405
  return (
406
  pitch_stats
 
26
 
27
  # GRADIO FUNCTIONS
28
 
29
+ # def clone_if_dataframe(item):
30
+ # if isinstance(item, pl.DataFrame):
31
+ # # print(type(item))
32
+ # return item.clone()
33
+ # else:
34
+ # return item
35
+ #
36
+ # def clone_df(fn):
37
+ # def _fn(*args, **kwargs):
38
+ # args = [clone_if_dataframe(arg) for arg in args]
39
+ # kwargs = {k: clone_if_dataframe(arg) for k, arg in kwargs.items()}
40
+ # return fn(*args, **kwargs)
41
+ # return _fn
42
+ #
43
  def copy_dataframe(df, num_copy_to):
44
  return [df.clone() for _ in range(num_copy_to)]
45
 
 
79
  ]
80
 
81
 
82
+ # @clone_df
83
  def plot_loc(df, handedness, league_df=None, min_pitches=3, max_pitches=5000):
84
 
85
  loc = df.select(['plate_x', 'plate_z'])
 
166
 
167
 
168
  # velo distribution
169
+ # @clone_df
170
  def plot_velo(df=None, player=None, velos=None, pitch_type=None, pitch_name=None, min_pitches=2):
171
  assert not ((velos is None and player is None) or (velos is not None and player is not None)), 'exactly one of `player` or `velos` must be specified'
172
 
 
212
  )
213
  return fig
214
 
215
+ # @clone_df
216
  def plot_velo_summary(df, league_df, player):
217
 
218
  min_pitches = 2
 
227
 
228
  fig = go.Figure()
229
 
230
+ min_velo = player_df['release_speed'].min() if len(player_df) else 130
231
+ max_velo = player_df['release_speed'].max() if len(player_df) else 160
232
+ velo_center = (min_velo + max_velo) / 2
233
  # for i, (pitch_name, count) in enumerate(pitch_counts.items()):
234
  for i, (pitch_name, count) in enumerate(pitch_counts.iter_rows()):
235
  # velos = player_df.loc[pitch_name, 'release_speed']
 
301
  # ))
302
 
303
  # fig.update_xaxes(title='Velocity', range=[player_df['release_speed'].dropna().min() - 2, player_df['release_speed'].dropna().max() + 2])
304
+ fig.update_xaxes(title='Velocity', range=[min_velo - 2, max_velo + 2])
305
  # fig.update_yaxes(range=[0, len(pitch_counts)+1-0.25], visible=False)
306
  fig.update_yaxes(range=[0, len(pitch_counts)-0.25], visible=False)
307
  fig.update_layout(
 
315
  return fig
316
 
317
 
318
+ def update_dfs(player, handedness, start_date, end_date, df):
319
+ date_filter = (pl.col('game_date') >= start_date) & (pl.col('game_date') <= end_date)
320
  if handedness == 'Both':
321
  handedness_filter = pl.col('stand').is_in(['R', 'L'])
322
  # _pitch_stats = pitch_stats
 
330
  # _pitch_stats = lhb_pitch_stats
331
  # _league_pitch_stats = lhb_league_pitch_stats
332
  player_filter = pl.col('name') == player
333
+ non_player_filter = handedness_filter & date_filter
334
+ final_filter = player_filter & non_player_filter
335
  _df = df.filter(final_filter)
336
+ _league_df = df.filter(non_player_filter)
337
 
338
  return (
339
  _df,
 
351
  def preview_df(df):
352
  return df.head()
353
 
354
+ # @clone_df
355
  def plot_usage(df, player):
356
  fig = px.pie(df.select('pitch_name'), names='pitch_name')
357
  fig.update_traces(texttemplate='%{percent:.1%}', hovertemplate=f'<b>{player}</b><br>' + 'threw a <b>%{label}</b><br><b>%{percent:.1%}</b> of the time (<b>%{value}</b> pitches)')
358
  return fig
359
 
360
+ # @clone_df
361
  def plot_pitch_cards(df, league_df, pitch_stats, handedness):
362
  pitch_counts = df['pitch_name'].value_counts().sort('count', descending=True)
363
 
 
404
 
405
  return pitch_rows + pitch_groups + pitch_names + pitch_infos + pitch_velos + pitch_locs
406
 
407
+ # @clone_df
408
  def update_velo_stats(pitch_stats, league_pitch_stats):
409
  return (
410
  pitch_stats
pitch_leaderboard.py CHANGED
@@ -2,11 +2,12 @@ import gradio as gr
2
  import polars as pl
3
 
4
 
5
- from data import df, game_df, compute_pitch_stats, SEASONS
 
6
  from gradio_function import *
7
  from css import css
8
 
9
- SEASONS = [int(season) for season in SEASONS]
10
 
11
  def filter_pitch_leaderboard(season, min_pitches):
12
  return (
@@ -21,7 +22,7 @@ def create_pitch_leaderboard():
21
  css=css
22
  ) as demo:
23
  init_min_pitches = 100
24
- init_season = max(SEASONS)
25
  init_pitch_stats = filter_pitch_leaderboard(init_season, init_min_pitches)
26
  init_pitch_stats.write_csv('pitch_leaderboard.csv')
27
  pitch_leaderboard_df = gr.State(init_pitch_stats)
 
2
  import polars as pl
3
 
4
 
5
+ from data import df, game_df, compute_pitch_stats
6
+ from seasons import SEASONS, LATEST_SEASON
7
  from gradio_function import *
8
  from css import css
9
 
10
+ # SEASONS = [int(season) for season in SEASONS]
11
 
12
  def filter_pitch_leaderboard(season, min_pitches):
13
  return (
 
22
  css=css
23
  ) as demo:
24
  init_min_pitches = 100
25
+ init_season = LATEST_SEASON
26
  init_pitch_stats = filter_pitch_leaderboard(init_season, init_min_pitches)
27
  init_pitch_stats.write_csv('pitch_leaderboard.csv')
28
  pitch_leaderboard_df = gr.State(init_pitch_stats)
pitcher_dashboard.py CHANGED
@@ -1,13 +1,16 @@
1
 
2
  import gradio as gr
3
  # import pandas as pd
 
4
  import polars as pl
5
 
6
  from math import ceil
 
7
  import os
8
 
9
  from data import df, pitch_stats, league_pitch_stats, player_df
10
  from gradio_function import *
 
11
  from translate import jp_pitch_to_en_pitch, max_pitch_types
12
  from css import css
13
 
@@ -30,7 +33,10 @@ def create_pitcher_dashboard():
30
 
31
  with gr.Row():
32
  player = gr.Dropdown(value=None, choices=sorted(player_df.filter(pl.col('name').is_not_null())['name'].to_list()), label='Player')
 
 
33
  handedness = gr.Radio(value='Both', choices=['Both', 'Left', 'Right'], type='value', interactive=False, label='Batter Handedness')
 
34
 
35
  # preview = gr.DataFrame()
36
  download_file = gr.DownloadButton(label='Download player data')
@@ -91,12 +97,21 @@ def create_pitcher_dashboard():
91
  fn_configs[k]['df'] = gr.State(df)
92
  fn_configs[k]['inputs'] = [fn_configs[k]['df']] + fn_configs[k]['inputs']
93
 
 
 
 
 
 
 
94
  (
95
  player
96
- .input(update_dfs, inputs=[player, handedness, source_df], outputs=[app_df, app_league_df, app_pitch_stats, app_league_pitch_stats])
97
  .then(lambda : gr.update(value='Both', interactive=True), outputs=handedness)
 
98
  )
99
- handedness.input(update_dfs, inputs=[player, handedness, source_df], outputs=[app_df, app_league_df, app_pitch_stats, app_league_pitch_stats])
 
 
100
 
101
  # app_df.change(preview_df, inputs=app_df, outputs=preview)
102
  # app_df.change(set_download_file, inputs=app_df, outputs=download_file)
 
1
 
2
  import gradio as gr
3
  # import pandas as pd
4
+ from gradio_calendar import Calendar
5
  import polars as pl
6
 
7
  from math import ceil
8
+ import datetime
9
  import os
10
 
11
  from data import df, pitch_stats, league_pitch_stats, player_df
12
  from gradio_function import *
13
+ from seasons import LATEST_SEASON
14
  from translate import jp_pitch_to_en_pitch, max_pitch_types
15
  from css import css
16
 
 
33
 
34
  with gr.Row():
35
  player = gr.Dropdown(value=None, choices=sorted(player_df.filter(pl.col('name').is_not_null())['name'].to_list()), label='Player')
36
+ start_date = Calendar(value=f'{LATEST_SEASON}-03-01', type='datetime', label='Start Date')
37
+ end_date = Calendar(value=f'{LATEST_SEASON}-11-30', type='datetime', label='End Date')
38
  handedness = gr.Radio(value='Both', choices=['Both', 'Left', 'Right'], type='value', interactive=False, label='Batter Handedness')
39
+ gr.Markdown('Note: We do not have spring training data, or 2024 postseason data')
40
 
41
  # preview = gr.DataFrame()
42
  download_file = gr.DownloadButton(label='Download player data')
 
97
  fn_configs[k]['df'] = gr.State(df)
98
  fn_configs[k]['inputs'] = [fn_configs[k]['df']] + fn_configs[k]['inputs']
99
 
100
+ update_dfs_kwargs = dict(
101
+ fn=update_dfs,
102
+ inputs=[player, handedness, start_date, end_date, source_df],
103
+ outputs=[app_df, app_league_df, app_pitch_stats, app_league_pitch_stats]
104
+ )
105
+ non_player_search_inputs = [handedness, start_date, end_date]
106
  (
107
  player
108
+ .input(**update_dfs_kwargs)
109
  .then(lambda : gr.update(value='Both', interactive=True), outputs=handedness)
110
+ # .then(lambda: [gr.update(interactive=True) for _ in range(len(non_player_search_inputs))], outputs=non_player_search_inputs) # breaks Calendar for some reason
111
  )
112
+ for component in non_player_search_inputs:
113
+ component.input(**update_dfs_kwargs)
114
+ # start_date.input(**update_dfs_kwargs)
115
 
116
  # app_df.change(preview_df, inputs=app_df, outputs=preview)
117
  # app_df.change(set_download_file, inputs=app_df, outputs=download_file)
seasons.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+ DATA_DIR = 'data'
4
+ SEASONS = sorted([int(folder) for folder in os.listdir(DATA_DIR) if not folder.startswith('.')])
5
+ LATEST_SEASON = max(SEASONS)