library (tidyverse)
library (tidymodels)
library (zoo)
library (hoopR)
library (gt)
set.seed (1234 )
kenpom <- read_csv ("http://mattwaite.github.io/sportsdatafiles/ratings.csv" )
namekey <- read_csv ("http://mattwaite.github.io/sportsdatafiles/nametable.csv" )
simplestats <- read_csv ("http://mattwaite.github.io/sportsdatafiles/simplestats.csv" )
teamgames <- load_mbb_team_box (seasons = 2015 : 2022 ) %>%
filter (game_date < as.Date ("2022-03-17" )) %>%
separate (field_goals_made_field_goals_attempted, into = c ("field_goals_made" ,"field_goals_attempted" )) %>%
separate (three_point_field_goals_made_three_point_field_goals_attempted, into = c ("three_point_field_goals_made" ,"three_point_field_goals_attempted" )) %>%
separate (free_throws_made_free_throws_attempted, into = c ("free_throws_made" ,"free_throws_attempted" )) %>%
mutate_at (12 : 34 , as.numeric) %>%
mutate (team_id = as.numeric (team_id))
teamgames <- teamgames %>% left_join (namekey) %>% left_join (kenpom, by= c ("team" = "team" , "season" = "year" )) %>% left_join (simplestats, by= c ("School" = "School" , "season" = "Season" ))
teamstats <- teamgames %>%
group_by (team_short_display_name, season) %>%
arrange (game_date) %>%
mutate (
team_score = ((field_goals_made- three_point_field_goals_made) * 2 ) + (three_point_field_goals_made* 3 ) + free_throws_made,
possessions = field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted),
team_offensive_efficiency = (team_score/ possessions)* 100 ,
true_shooting_percentage = (team_score / (2 * (field_goals_attempted + (.44 * free_throws_attempted)))) * 100 ,
turnover_pct = turnovers/ (field_goals_attempted + 0.44 * free_throws_attempted + turnovers),
free_throw_factor = free_throws_made/ field_goals_attempted,
team_rolling_true_shooting_percentage = rollmean (lag (true_shooting_percentage, n= 1 ), k= 10 , align= "right" , fill= NA ),
team_rolling_turnover_percentage = rollmean (lag (turnover_pct, n= 1 ), k= 10 , align= "right" , fill= NA ),
team_rolling_free_throw_factor = rollmean (lag (free_throw_factor, n= 1 ), k= 10 , align= "right" , fill= NA ),
team_cumulative_mean_true_shooting = lag (cummean (true_shooting_percentage), n= 1 , default= 0 ),
team_cumulative_mean_turnover_percentage = lag (cummean (turnover_pct), n= 1 , default= 0 ),
team_cumulative_mean_free_throw_factor = lag (cummean (free_throw_factor), n= 1 , default= 0 ),
team_cumulative_o_eff = lag (cummean (team_offensive_efficiency), n= 1 , default= 0 ),
team_rolling_o_eff = rollmean (lag (team_offensive_efficiency, n= 1 ), k= 10 , align= "right" , fill= NA )
) %>% ungroup () %>%
rename (
team_sos = OverallSOS,
team_srs = OverallSRS,
team_luck = luck
)
teamstats <- teamstats %>%
select (game_id, team_id, team_offensive_efficiency) %>%
mutate (team_id = as.numeric (team_id)) %>%
rename (opponent_id = team_id, opponent_offensive_efficiency= team_offensive_efficiency) %>%
left_join (teamstats) %>%
group_by (team_short_display_name, season) %>%
arrange (game_date) %>%
mutate (
team_cumulative_d_eff = lag (cummean (opponent_offensive_efficiency), n= 1 , default= 0 ),
team_rolling_d_eff = rollmean (lag (opponent_offensive_efficiency, n= 1 ), k= 10 , align= "right" , fill= NA )
) %>% ungroup ()
opponent <- teamstats %>% select (game_id, team_id, offensive_rebounds, defensive_rebounds) %>% rename (opponent_id= team_id, opponent_offensive_rebounds = offensive_rebounds, opponent_defensive_rebounds= defensive_rebounds) %>% mutate (opponent_id = as.numeric (opponent_id))
newteamstats <- teamstats %>%
inner_join (opponent) %>%
mutate (
orb = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
drb = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
team_rolling_orb = rollmean (lag (orb, n= 1 ), k= 10 , align= "right" , fill= NA ),
team_rolling_drb = rollmean (lag (drb, n= 1 ), k= 10 , align= "right" , fill= NA ),
team_cumulative_mean_orb = lag (cummean (orb), n= 1 , default= 0 ),
team_cumulative_mean_drb = lag (cummean (drb), n= 1 , default= 0 ),
team_efficiency_margin = team_cumulative_o_eff - team_cumulative_d_eff,
team_recent_efficiency_margin = team_rolling_o_eff - team_rolling_d_eff,
team_recency = team_recent_efficiency_margin - team_efficiency_margin
)
team_side <- newteamstats %>%
select (game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_sos, team_srs, team_luck, team_recency) %>% na.omit ()
opponent_side <- newteamstats %>%
select (game_id, team_id, team_short_display_name, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_sos, team_srs, team_luck, team_recency) %>% na.omit () %>%
rename (
opponent_id = team_id,
opponent_short_display_name = team_short_display_name,
opponent_score = team_score,
opponent_rolling_true_shooting_percentage = team_rolling_true_shooting_percentage,
opponent_rolling_free_throw_factor = team_rolling_free_throw_factor,
opponent_rolling_turnover_percentage = team_rolling_turnover_percentage,
opponent_rolling_orb = team_rolling_orb,
opponent_rolling_drb = team_rolling_drb,
opponent_cumulative_mean_true_shooting = team_cumulative_mean_true_shooting,
opponent_cumulative_mean_turnover_percentage = team_cumulative_mean_turnover_percentage,
opponent_cumulative_mean_free_throw_factor = team_cumulative_mean_free_throw_factor,
opponent_cumulative_mean_orb = team_cumulative_mean_orb,
opponent_cumulative_mean_drb = team_cumulative_mean_drb,
opponent_cumulative_o_eff = team_cumulative_o_eff,
opponent_cumulative_d_eff = team_cumulative_d_eff,
opponent_efficiency_margin = team_efficiency_margin,
opponent_srs = team_srs,
opponent_sos = team_sos,
opponent_luck = team_luck,
opponent_recency = team_recency
) %>%
mutate (
opponent_id = as.numeric (opponent_id)
)
games <- team_side %>% inner_join (opponent_side) %>% mutate (
TeamResult = as.factor (case_when (
team_score > opponent_score ~ "W" ,
opponent_score > team_score ~ "L"
))) %>% na.omit ()
games$ TeamResult <- relevel (games$ TeamResult, ref= "W" )
cumulativesimplemodelgames <- games %>% select (game_id, game_date, team_short_display_name, opponent_short_display_name, season, opponent_efficiency_margin, team_efficiency_margin, team_sos, team_srs, opponent_sos, opponent_srs, opponent_luck, team_luck, opponent_recency, team_recency, TeamResult) %>% na.omit ()
cumulative_split <- initial_split (cumulativesimplemodelgames, prop = .8 )
cumulative_train <- training (cumulative_split)
cumulative_test <- testing (cumulative_split)
cumulative_recipe <-
recipe (TeamResult ~ ., data = cumulative_train) %>%
update_role (game_id, game_date, team_short_display_name, opponent_short_display_name, season, new_role = "ID" ) %>%
step_normalize (all_predictors ())
log_mod <-
logistic_reg () %>%
set_engine ("glm" ) %>%
set_mode ("classification" )
log_workflow <-
workflow () %>%
add_model (log_mod) %>%
add_recipe (cumulative_recipe)
log_fit <-
log_workflow %>%
fit (data = cumulative_train)
teamstats <- teamgames %>%
group_by (team_short_display_name, season) %>%
arrange (game_date) %>%
mutate (
team_score = ((field_goals_made- three_point_field_goals_made) * 2 ) + (three_point_field_goals_made* 3 ) + free_throws_made,
possessions = field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted),
team_offensive_efficiency = (team_score/ possessions)* 100 ,
true_shooting_percentage = (team_score / (2 * (field_goals_attempted + (.44 * free_throws_attempted)))) * 100 ,
turnover_pct = turnovers/ (field_goals_attempted + 0.44 * free_throws_attempted + turnovers),
free_throw_factor = free_throws_made/ field_goals_attempted,
team_rolling_true_shooting_percentage = rollmean (true_shooting_percentage, k= 5 , align= "right" , fill= NA ),
team_rolling_turnover_percentage = rollmean (turnover_pct, k= 5 , align= "right" , fill= NA ),
team_rolling_free_throw_factor = rollmean (free_throw_factor, k= 4 , align= "right" , fill= NA ),
team_cumulative_mean_true_shooting = cummean (true_shooting_percentage),
team_cumulative_mean_turnover_percentage = cummean (turnover_pct),
team_cumulative_mean_free_throw_factor = cummean (free_throw_factor),
team_cumulative_o_eff = cummean (team_offensive_efficiency),
team_rolling_o_eff = rollmean (team_offensive_efficiency, k= 10 , align= "right" , fill= NA )
) %>% ungroup ()
teamstats <- teamstats %>%
select (game_id, team_id, team_offensive_efficiency) %>%
mutate (team_id = as.numeric (team_id)) %>%
rename (opponent_id = team_id, opponent_offensive_efficiency= team_offensive_efficiency) %>%
left_join (teamstats) %>%
group_by (team_short_display_name, season) %>%
arrange (game_date) %>%
mutate (
team_cumulative_d_eff = cummean (opponent_offensive_efficiency),
team_rolling_d_eff = rollmean (opponent_offensive_efficiency, k= 10 , align= "right" , fill= NA )
) %>% ungroup () %>%
rename (
team_sos = OverallSOS,
team_srs = OverallSRS,
team_luck = luck
)
opponent <- teamstats %>% select (game_id, team_id, offensive_rebounds, defensive_rebounds) %>% rename (opponent_id= team_id, opponent_offensive_rebounds = offensive_rebounds, opponent_defensive_rebounds= defensive_rebounds) %>% mutate (opponent_id = as.numeric (opponent_id))
newteamstats <- teamstats %>%
inner_join (opponent) %>%
mutate (
orb = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
drb = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
team_rolling_orb = rollmean (orb, k= 5 , align= "right" , fill= NA ),
team_rolling_drb = rollmean (drb, k= 5 , align= "right" , fill= NA ),
team_cumulative_mean_orb = cummean (orb),
team_cumulative_mean_drb = cummean (drb),
team_efficiency_margin = team_cumulative_o_eff - team_cumulative_d_eff,
team_recent_efficiency_margin = team_rolling_o_eff - team_rolling_d_eff,
team_recency = team_recent_efficiency_margin - team_efficiency_margin
)
team_side <- newteamstats %>%
select (game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_rolling_o_eff, team_rolling_d_eff, team_sos, team_srs, team_luck, team_recency) %>% na.omit ()
opponent_side <- newteamstats %>%
select (game_id, team_id, team_short_display_name, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_rolling_o_eff, team_rolling_d_eff, team_sos, team_srs, team_luck, team_recency) %>% na.omit () %>%
rename (
opponent_id = team_id,
opponent_short_display_name = team_short_display_name,
opponent_score = team_score,
opponent_rolling_true_shooting_percentage = team_rolling_true_shooting_percentage,
opponent_rolling_free_throw_factor = team_rolling_free_throw_factor,
opponent_rolling_turnover_percentage = team_rolling_turnover_percentage,
opponent_rolling_orb = team_rolling_orb,
opponent_rolling_drb = team_rolling_drb,
opponent_cumulative_mean_true_shooting = team_cumulative_mean_true_shooting,
opponent_cumulative_mean_turnover_percentage = team_cumulative_mean_turnover_percentage,
opponent_cumulative_mean_free_throw_factor = team_cumulative_mean_free_throw_factor,
opponent_cumulative_mean_orb = team_cumulative_mean_orb,
opponent_cumulative_mean_drb = team_cumulative_mean_drb,
opponent_cumulative_o_eff = team_cumulative_o_eff,
opponent_cumulative_d_eff = team_cumulative_d_eff,
opponent_efficiency_margin = team_efficiency_margin,
opponent_rolling_o_eff = team_rolling_o_eff,
opponent_rolling_d_eff = team_rolling_d_eff,
opponent_srs = team_srs,
opponent_sos = team_sos,
opponent_luck = team_luck,
opponent_recency = team_recency
) %>%
mutate (
opponent_id = as.numeric (opponent_id)
)
games <- team_side %>% inner_join (opponent_side) %>% mutate (
TeamResult = as.factor (case_when (
team_score > opponent_score ~ "W" ,
opponent_score > team_score ~ "L"
))) %>% na.omit ()
games$ TeamResult <- relevel (games$ TeamResult, ref= "W" )
cumulativesimplemodelgames <- games %>% select (game_id, game_date, team_short_display_name, opponent_short_display_name, season, opponent_efficiency_margin, team_efficiency_margin, team_sos, team_srs, opponent_sos, opponent_srs, team_luck, opponent_luck, team_recency, opponent_recency, TeamResult)
westround1games <- tibble (
team_short_display_name= "Gonzaga" ,
opponent_short_display_name= "Georgia State"
) %>% add_row (
team_short_display_name= "Boise State" ,
opponent_short_display_name= "Memphis"
) %>% add_row (
team_short_display_name= "UConn" ,
opponent_short_display_name= "New Mexico St"
) %>% add_row (
team_short_display_name= "Arkansas" ,
opponent_short_display_name= "Vermont"
) %>% add_row (
team_short_display_name= "Alabama" ,
opponent_short_display_name= "Notre Dame"
) %>% add_row (
team_short_display_name= "Texas Tech" ,
opponent_short_display_name= "Montana State"
) %>% add_row (
team_short_display_name= "Michigan State" ,
opponent_short_display_name= "Davidson"
) %>% add_row (
team_short_display_name= "Duke" ,
opponent_short_display_name= "CSU Fullerton"
)
westround1games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (westround1games)
westround1games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (westround1games)
westround1log <- log_fit %>% predict (new_data = westround1games) %>%
bind_cols (westround1games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
westround1log <- log_fit %>% predict (new_data = westround1log, type= "prob" ) %>%
bind_cols (westround1log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
westround2games <- tibble (
team_short_display_name= "Gonzaga" ,
opponent_short_display_name= "Memphis"
) %>% add_row (
team_short_display_name= "UConn" ,
opponent_short_display_name= "Arkansas"
) %>% add_row (
team_short_display_name= "Alabama" ,
opponent_short_display_name= "Texas Tech"
) %>% add_row (
team_short_display_name= "Michigan State" ,
opponent_short_display_name= "Duke"
)
westround2games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (westround2games)
westround2games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (westround2games)
westround2log <- log_fit %>% predict (new_data = westround2games) %>%
bind_cols (westround2games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
westround2log <- log_fit %>% predict (new_data = westround2log, type= "prob" ) %>%
bind_cols (westround2log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
westround3games <- tibble (
team_short_display_name= "Gonzaga" ,
opponent_short_display_name= "Arkansas"
) %>% add_row (
team_short_display_name= "Alabama" ,
opponent_short_display_name= "Duke"
)
westround3games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (westround3games)
westround3games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (westround3games)
westround3log <- log_fit %>% predict (new_data = westround3games) %>%
bind_cols (westround3games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
westround3log <- log_fit %>% predict (new_data = westround3log, type= "prob" ) %>%
bind_cols (westround3log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
westround4games <- tibble (
team_short_display_name= "Gonzaga" ,
opponent_short_display_name= "Alabama"
)
westround4games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (westround4games)
westround4games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (westround4games)
westround4log <- log_fit %>% predict (new_data = westround4games) %>%
bind_cols (westround4games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
westround4log <- log_fit %>% predict (new_data = westround4log, type= "prob" ) %>%
bind_cols (westround4log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
eastround1games <- tibble (
team_short_display_name= "Baylor" ,
opponent_short_display_name= "Norfolk State"
) %>% add_row (
team_short_display_name= "North Carolina" ,
opponent_short_display_name= "Marquette"
) %>% add_row (
team_short_display_name= "Saint Mary's" ,
opponent_short_display_name= "Indiana"
) %>% add_row (
team_short_display_name= "UCLA" ,
opponent_short_display_name= "Akron"
) %>% add_row (
team_short_display_name= "Texas" ,
opponent_short_display_name= "Virginia Tech"
) %>% add_row (
team_short_display_name= "Purdue" ,
opponent_short_display_name= "Yale"
) %>% add_row (
team_short_display_name= "Murray State" ,
opponent_short_display_name= "San Francisco"
) %>% add_row (
team_short_display_name= "Kentucky" ,
opponent_short_display_name= "Saint Peter's"
)
eastround1games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (eastround1games)
eastround1games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (eastround1games)
eastround1log <- log_fit %>% predict (new_data = eastround1games) %>%
bind_cols (eastround1games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
eastround1log <- log_fit %>% predict (new_data = eastround1log, type= "prob" ) %>%
bind_cols (eastround1log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
eastround2games <- tibble (
team_short_display_name= "Baylor" ,
opponent_short_display_name= "North Carolina"
) %>% add_row (
team_short_display_name= "Indiana" ,
opponent_short_display_name= "UCLA"
) %>% add_row (
team_short_display_name= "Texas" ,
opponent_short_display_name= "Purdue"
) %>% add_row (
team_short_display_name= "San Francisco" ,
opponent_short_display_name= "Kentucky"
)
eastround2games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (eastround2games)
eastround2games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (eastround2games)
eastround2log <- log_fit %>% predict (new_data = eastround2games) %>%
bind_cols (eastround2games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
eastround2log <- log_fit %>% predict (new_data = eastround2log, type= "prob" ) %>%
bind_cols (eastround2log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
eastround3games <- tibble (
team_short_display_name= "Baylor" ,
opponent_short_display_name= "UCLA"
) %>% add_row (
team_short_display_name= "Purdue" ,
opponent_short_display_name= "Kentucky"
)
eastround3games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (eastround3games)
eastround3games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (eastround3games)
eastround3log <- log_fit %>% predict (new_data = eastround3games) %>%
bind_cols (eastround3games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
eastround3log <- log_fit %>% predict (new_data = eastround3log, type= "prob" ) %>%
bind_cols (eastround3log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
eastround4games <- tibble (
team_short_display_name= "Baylor" ,
opponent_short_display_name= "Purdue"
)
eastround4games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (eastround4games)
eastround4games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (eastround4games)
eastround4log <- log_fit %>% predict (new_data = eastround4games) %>%
bind_cols (eastround4games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
eastround4log <- log_fit %>% predict (new_data = eastround4log, type= "prob" ) %>%
bind_cols (eastround4log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
southround1games <- tibble (
team_short_display_name= "Arizona" ,
opponent_short_display_name= "Wright State"
) %>% add_row (
team_short_display_name= "Seton Hall" ,
opponent_short_display_name= "TCU"
) %>% add_row (
team_short_display_name= "Houston" ,
opponent_short_display_name= "UAB"
) %>% add_row (
team_short_display_name= "Illinois" ,
opponent_short_display_name= "Chattanooga"
) %>% add_row (
team_short_display_name= "Colorado State" ,
opponent_short_display_name= "Michigan"
) %>% add_row (
team_short_display_name= "Tennessee" ,
opponent_short_display_name= "Longwood"
) %>% add_row (
team_short_display_name= "Ohio State" ,
opponent_short_display_name= "Loyola Chicago"
) %>% add_row (
team_short_display_name= "Villanova" ,
opponent_short_display_name= "Delaware"
)
southround1games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (southround1games)
southround1games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (southround1games)
southround1log <- log_fit %>% predict (new_data = southround1games) %>%
bind_cols (southround1games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
southround1log <- log_fit %>% predict (new_data = southround1log, type= "prob" ) %>%
bind_cols (southround1log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
southround2games <- tibble (
team_short_display_name= "Arizona" ,
opponent_short_display_name= "Seton Hall"
) %>% add_row (
team_short_display_name= "Houston" ,
opponent_short_display_name= "Illinois"
) %>% add_row (
team_short_display_name= "Michigan" ,
opponent_short_display_name= "Tennessee"
) %>% add_row (
team_short_display_name= "Ohio State" ,
opponent_short_display_name= "Villanova"
)
southround2games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (southround2games)
southround2games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (southround2games)
southround2log <- log_fit %>% predict (new_data = southround2games) %>%
bind_cols (southround2games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
southround2log <- log_fit %>% predict (new_data = southround2log, type= "prob" ) %>%
bind_cols (southround2log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
southround3games <- tibble (
team_short_display_name= "Arizona" ,
opponent_short_display_name= "Illinois"
) %>% add_row (
team_short_display_name= "Michigan" ,
opponent_short_display_name= "Villanova"
)
southround3games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (southround3games)
southround3games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (southround3games)
southround3log <- log_fit %>% predict (new_data = southround3games) %>%
bind_cols (southround3games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
southround3log <- log_fit %>% predict (new_data = southround3log, type= "prob" ) %>%
bind_cols (southround3log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
southround4games <- tibble (
team_short_display_name= "Michigan" ,
opponent_short_display_name= "Illinois"
)
southround4games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (southround4games)
southround4games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (southround4games)
southround4log <- log_fit %>% predict (new_data = southround4games) %>%
bind_cols (southround4games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
southround4log <- log_fit %>% predict (new_data = southround4log, type= "prob" ) %>%
bind_cols (southround4log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
midwestround1games <- tibble (
team_short_display_name= "Kansas" ,
opponent_short_display_name= "Texas Southern"
) %>% add_row (
team_short_display_name= "San Diego State" ,
opponent_short_display_name= "Creighton"
) %>% add_row (
team_short_display_name= "Iowa" ,
opponent_short_display_name= "Richmond"
) %>% add_row (
team_short_display_name= "Providence" ,
opponent_short_display_name= "S Dakota St"
) %>% add_row (
team_short_display_name= "LSU" ,
opponent_short_display_name= "Iowa State"
) %>% add_row (
team_short_display_name= "Wisconsin" ,
opponent_short_display_name= "Colgate"
) %>% add_row (
team_short_display_name= "USC" ,
opponent_short_display_name= "Miami"
) %>% add_row (
team_short_display_name= "Auburn" ,
opponent_short_display_name= "J'Ville St"
)
midwestround1games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (midwestround1games)
midwestround1games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (midwestround1games)
midwestround1log <- log_fit %>% predict (new_data = midwestround1games) %>%
bind_cols (midwestround1games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
midwestround1log <- log_fit %>% predict (new_data = midwestround1log, type= "prob" ) %>%
bind_cols (midwestround1log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
midwestround2games <- tibble (
team_short_display_name= "Kansas" ,
opponent_short_display_name= "Creighton"
) %>% add_row (
team_short_display_name= "Iowa" ,
opponent_short_display_name= "Providence"
) %>% add_row (
team_short_display_name= "LSU" ,
opponent_short_display_name= "Wisconsin"
) %>% add_row (
team_short_display_name= "USC" ,
opponent_short_display_name= "Auburn"
)
midwestround2games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (midwestround2games)
midwestround2games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (midwestround2games)
midwestround2log <- log_fit %>% predict (new_data = midwestround2games) %>%
bind_cols (midwestround2games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
midwestround2log <- log_fit %>% predict (new_data = midwestround2log, type= "prob" ) %>%
bind_cols (midwestround2log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
midwestround3games <- tibble (
team_short_display_name= "Creighton" ,
opponent_short_display_name= "Iowa"
) %>% add_row (
team_short_display_name= "Wisconsin" ,
opponent_short_display_name= "USC"
)
midwestround3games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (midwestround3games)
midwestround3games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (midwestround3games)
midwestround3log <- log_fit %>% predict (new_data = midwestround3games) %>%
bind_cols (midwestround3games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
midwestround3log <- log_fit %>% predict (new_data = midwestround3log, type= "prob" ) %>%
bind_cols (midwestround3log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
midwestround4games <- tibble (
team_short_display_name= "Creighton" ,
opponent_short_display_name= "USC"
)
midwestround4games <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (midwestround4games)
midwestround4games <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (midwestround4games)
midwestround4log <- log_fit %>% predict (new_data = midwestround4games) %>%
bind_cols (midwestround4games) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
midwestround4log <- log_fit %>% predict (new_data = midwestround4log, type= "prob" ) %>%
bind_cols (midwestround4log) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
finalfourgames <- tibble (
team_short_display_name= "Gonzaga" ,
opponent_short_display_name= "Baylor"
) %>% add_row (
team_short_display_name= "Illinois" ,
opponent_short_display_name= "USC"
)
finalfourgames <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (finalfourgames)
finalfourgames <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (finalfourgames)
finalfourlog <- log_fit %>% predict (new_data = finalfourgames) %>%
bind_cols (finalfourgames) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
finalfourlog <- log_fit %>% predict (new_data = finalfourlog, type= "prob" ) %>%
bind_cols (finalfourlog) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())
champs <- tibble (
team_short_display_name= "Baylor" ,
opponent_short_display_name= "Illinois"
)
champs <- cumulativesimplemodelgames %>% group_by (team_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% select (- TeamResult, - starts_with ("opponent" )) %>% right_join (champs)
champs <- cumulativesimplemodelgames %>% group_by (opponent_short_display_name) %>% filter (game_date == max (game_date) & season == 2022 ) %>% slice (1 ) %>% ungroup () %>% select (- TeamResult, - starts_with ("team" ), - game_id, - game_date, - season) %>% right_join (champs)
champslog <- log_fit %>% predict (new_data = champs) %>%
bind_cols (champs) %>% select (.pred_class, team_short_display_name, opponent_short_display_name, everything ())
champslog <- log_fit %>% predict (new_data = champslog, type= "prob" ) %>%
bind_cols (champslog) %>% select (.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything ())