library(tidyverse)
library(tidymodels)
library(zoo)
library(hoopR)
library(gt)
set.seed(1234)
kenpom <- read_csv("http://mattwaite.github.io/sportsdatafiles/ratings.csv")
namekey <- read_csv("http://mattwaite.github.io/sportsdatafiles/nametable.csv")
simplestats <- read_csv("http://mattwaite.github.io/sportsdatafiles/simplestats.csv")
teamgames <- load_mbb_team_box(seasons = 2015:2022) %>%
filter(game_date < as.Date("2022-03-17")) %>%
separate(field_goals_made_field_goals_attempted, into = c("field_goals_made","field_goals_attempted")) %>%
separate(three_point_field_goals_made_three_point_field_goals_attempted, into = c("three_point_field_goals_made","three_point_field_goals_attempted")) %>%
separate(free_throws_made_free_throws_attempted, into = c("free_throws_made","free_throws_attempted")) %>%
mutate_at(12:34, as.numeric) %>%
mutate(team_id = as.numeric(team_id))
teamgames <- teamgames %>% left_join(namekey) %>% left_join(kenpom, by=c("team" = "team", "season"="year")) %>% left_join(simplestats, by=c("School" = "School", "season" = "Season"))
teamstats <- teamgames %>%
group_by(team_short_display_name, season) %>%
arrange(game_date) %>%
mutate(
team_score = ((field_goals_made-three_point_field_goals_made) * 2) + (three_point_field_goals_made*3) + free_throws_made,
possessions = field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted),
team_offensive_efficiency = (team_score/possessions)*100,
true_shooting_percentage = (team_score / (2*(field_goals_attempted + (.44 * free_throws_attempted)))) * 100,
turnover_pct = turnovers/(field_goals_attempted + 0.44 * free_throws_attempted + turnovers),
free_throw_factor = free_throws_made/field_goals_attempted,
team_rolling_true_shooting_percentage = rollmean(lag(true_shooting_percentage, n=1), k=10, align="right", fill=NA),
team_rolling_turnover_percentage = rollmean(lag(turnover_pct, n=1), k=10, align="right", fill=NA),
team_rolling_free_throw_factor = rollmean(lag(free_throw_factor, n=1), k=10, align="right", fill=NA),
team_cumulative_mean_true_shooting = lag(cummean(true_shooting_percentage), n=1, default=0),
team_cumulative_mean_turnover_percentage = lag(cummean(turnover_pct), n=1, default=0),
team_cumulative_mean_free_throw_factor = lag(cummean(free_throw_factor), n=1, default=0),
team_cumulative_o_eff = lag(cummean(team_offensive_efficiency), n=1, default=0),
team_rolling_o_eff = rollmean(lag(team_offensive_efficiency, n=1), k=10, align="right", fill=NA)
) %>% ungroup() %>%
rename(
team_sos = OverallSOS,
team_srs = OverallSRS,
team_luck = luck
)
teamstats <- teamstats %>%
select(game_id, team_id, team_offensive_efficiency) %>%
mutate(team_id = as.numeric(team_id)) %>%
rename(opponent_id = team_id, opponent_offensive_efficiency=team_offensive_efficiency) %>%
left_join(teamstats) %>%
group_by(team_short_display_name, season) %>%
arrange(game_date) %>%
mutate(
team_cumulative_d_eff = lag(cummean(opponent_offensive_efficiency), n=1, default=0),
team_rolling_d_eff = rollmean(lag(opponent_offensive_efficiency, n=1), k=10, align="right", fill=NA)
) %>% ungroup()
opponent <- teamstats %>% select(game_id, team_id, offensive_rebounds, defensive_rebounds) %>% rename(opponent_id=team_id, opponent_offensive_rebounds = offensive_rebounds, opponent_defensive_rebounds=defensive_rebounds) %>% mutate(opponent_id = as.numeric(opponent_id))
newteamstats <- teamstats %>%
inner_join(opponent) %>%
mutate(
orb = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
drb = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
team_rolling_orb = rollmean(lag(orb, n=1), k=10, align="right", fill=NA),
team_rolling_drb = rollmean(lag(drb, n=1), k=10, align="right", fill=NA),
team_cumulative_mean_orb = lag(cummean(orb), n=1, default=0),
team_cumulative_mean_drb = lag(cummean(drb), n=1, default=0),
team_efficiency_margin = team_cumulative_o_eff - team_cumulative_d_eff,
team_recent_efficiency_margin = team_rolling_o_eff - team_rolling_d_eff,
team_recency = team_recent_efficiency_margin - team_efficiency_margin
)
team_side <- newteamstats %>%
select(game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_sos, team_srs, team_luck, team_recency) %>% na.omit()
opponent_side <- newteamstats %>%
select(game_id, team_id, team_short_display_name, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_sos, team_srs, team_luck, team_recency) %>% na.omit() %>%
rename(
opponent_id = team_id,
opponent_short_display_name = team_short_display_name,
opponent_score = team_score,
opponent_rolling_true_shooting_percentage = team_rolling_true_shooting_percentage,
opponent_rolling_free_throw_factor = team_rolling_free_throw_factor,
opponent_rolling_turnover_percentage = team_rolling_turnover_percentage,
opponent_rolling_orb = team_rolling_orb,
opponent_rolling_drb = team_rolling_drb,
opponent_cumulative_mean_true_shooting = team_cumulative_mean_true_shooting,
opponent_cumulative_mean_turnover_percentage = team_cumulative_mean_turnover_percentage,
opponent_cumulative_mean_free_throw_factor = team_cumulative_mean_free_throw_factor,
opponent_cumulative_mean_orb = team_cumulative_mean_orb,
opponent_cumulative_mean_drb = team_cumulative_mean_drb,
opponent_cumulative_o_eff = team_cumulative_o_eff,
opponent_cumulative_d_eff = team_cumulative_d_eff,
opponent_efficiency_margin = team_efficiency_margin,
opponent_srs = team_srs,
opponent_sos = team_sos,
opponent_luck = team_luck,
opponent_recency = team_recency
) %>%
mutate(
opponent_id = as.numeric(opponent_id)
)
games <- team_side %>% inner_join(opponent_side) %>% mutate(
TeamResult = as.factor(case_when(
team_score > opponent_score ~ "W",
opponent_score > team_score ~ "L"
))) %>% na.omit()
games$TeamResult <- relevel(games$TeamResult, ref="W")
cumulativesimplemodelgames <- games %>% select(game_id, game_date, team_short_display_name, opponent_short_display_name, season, opponent_efficiency_margin, team_efficiency_margin, team_sos, team_srs, opponent_sos, opponent_srs, opponent_luck, team_luck, opponent_recency, team_recency, TeamResult) %>% na.omit()
cumulative_split <- initial_split(cumulativesimplemodelgames, prop = .8)
cumulative_train <- training(cumulative_split)
cumulative_test <- testing(cumulative_split)
cumulative_recipe <-
recipe(TeamResult ~ ., data = cumulative_train) %>%
update_role(game_id, game_date, team_short_display_name, opponent_short_display_name, season, new_role = "ID") %>%
step_normalize(all_predictors())
log_mod <-
logistic_reg() %>%
set_engine("glm") %>%
set_mode("classification")
log_workflow <-
workflow() %>%
add_model(log_mod) %>%
add_recipe(cumulative_recipe)
log_fit <-
log_workflow %>%
fit(data = cumulative_train)
teamstats <- teamgames %>%
group_by(team_short_display_name, season) %>%
arrange(game_date) %>%
mutate(
team_score = ((field_goals_made-three_point_field_goals_made) * 2) + (three_point_field_goals_made*3) + free_throws_made,
possessions = field_goals_attempted - offensive_rebounds + turnovers + (.475 * free_throws_attempted),
team_offensive_efficiency = (team_score/possessions)*100,
true_shooting_percentage = (team_score / (2*(field_goals_attempted + (.44 * free_throws_attempted)))) * 100,
turnover_pct = turnovers/(field_goals_attempted + 0.44 * free_throws_attempted + turnovers),
free_throw_factor = free_throws_made/field_goals_attempted,
team_rolling_true_shooting_percentage = rollmean(true_shooting_percentage, k=5, align="right", fill=NA),
team_rolling_turnover_percentage = rollmean(turnover_pct, k=5, align="right", fill=NA),
team_rolling_free_throw_factor = rollmean(free_throw_factor, k=4, align="right", fill=NA),
team_cumulative_mean_true_shooting = cummean(true_shooting_percentage),
team_cumulative_mean_turnover_percentage = cummean(turnover_pct),
team_cumulative_mean_free_throw_factor = cummean(free_throw_factor),
team_cumulative_o_eff = cummean(team_offensive_efficiency),
team_rolling_o_eff = rollmean(team_offensive_efficiency, k=10, align="right", fill=NA)
) %>% ungroup()
teamstats <- teamstats %>%
select(game_id, team_id, team_offensive_efficiency) %>%
mutate(team_id = as.numeric(team_id)) %>%
rename(opponent_id = team_id, opponent_offensive_efficiency=team_offensive_efficiency) %>%
left_join(teamstats) %>%
group_by(team_short_display_name, season) %>%
arrange(game_date) %>%
mutate(
team_cumulative_d_eff = cummean(opponent_offensive_efficiency),
team_rolling_d_eff = rollmean(opponent_offensive_efficiency, k=10, align="right", fill=NA)
) %>% ungroup() %>%
rename(
team_sos = OverallSOS,
team_srs = OverallSRS,
team_luck = luck
)
opponent <- teamstats %>% select(game_id, team_id, offensive_rebounds, defensive_rebounds) %>% rename(opponent_id=team_id, opponent_offensive_rebounds = offensive_rebounds, opponent_defensive_rebounds=defensive_rebounds) %>% mutate(opponent_id = as.numeric(opponent_id))
newteamstats <- teamstats %>%
inner_join(opponent) %>%
mutate(
orb = offensive_rebounds / (offensive_rebounds + opponent_defensive_rebounds),
drb = defensive_rebounds / (opponent_offensive_rebounds + defensive_rebounds),
team_rolling_orb = rollmean(orb, k=5, align="right", fill=NA),
team_rolling_drb = rollmean(drb, k=5, align="right", fill=NA),
team_cumulative_mean_orb = cummean(orb),
team_cumulative_mean_drb = cummean(drb),
team_efficiency_margin = team_cumulative_o_eff - team_cumulative_d_eff,
team_recent_efficiency_margin = team_rolling_o_eff - team_rolling_d_eff,
team_recency = team_recent_efficiency_margin - team_efficiency_margin
)
team_side <- newteamstats %>%
select(game_id, team_id, team_short_display_name, opponent_id, game_date, season, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_rolling_o_eff, team_rolling_d_eff, team_sos, team_srs, team_luck, team_recency) %>% na.omit()
opponent_side <- newteamstats %>%
select(game_id, team_id, team_short_display_name, team_score, team_rolling_true_shooting_percentage, team_rolling_free_throw_factor, team_rolling_turnover_percentage, team_rolling_orb, team_rolling_drb, team_cumulative_mean_true_shooting, team_cumulative_mean_turnover_percentage, team_cumulative_mean_free_throw_factor, team_cumulative_mean_orb, team_cumulative_mean_drb, team_cumulative_o_eff, team_cumulative_d_eff, team_efficiency_margin, team_rolling_o_eff, team_rolling_d_eff, team_sos, team_srs, team_luck, team_recency) %>% na.omit() %>%
rename(
opponent_id = team_id,
opponent_short_display_name = team_short_display_name,
opponent_score = team_score,
opponent_rolling_true_shooting_percentage = team_rolling_true_shooting_percentage,
opponent_rolling_free_throw_factor = team_rolling_free_throw_factor,
opponent_rolling_turnover_percentage = team_rolling_turnover_percentage,
opponent_rolling_orb = team_rolling_orb,
opponent_rolling_drb = team_rolling_drb,
opponent_cumulative_mean_true_shooting = team_cumulative_mean_true_shooting,
opponent_cumulative_mean_turnover_percentage = team_cumulative_mean_turnover_percentage,
opponent_cumulative_mean_free_throw_factor = team_cumulative_mean_free_throw_factor,
opponent_cumulative_mean_orb = team_cumulative_mean_orb,
opponent_cumulative_mean_drb = team_cumulative_mean_drb,
opponent_cumulative_o_eff = team_cumulative_o_eff,
opponent_cumulative_d_eff = team_cumulative_d_eff,
opponent_efficiency_margin = team_efficiency_margin,
opponent_rolling_o_eff = team_rolling_o_eff,
opponent_rolling_d_eff = team_rolling_d_eff,
opponent_srs = team_srs,
opponent_sos = team_sos,
opponent_luck = team_luck,
opponent_recency = team_recency
) %>%
mutate(
opponent_id = as.numeric(opponent_id)
)
games <- team_side %>% inner_join(opponent_side) %>% mutate(
TeamResult = as.factor(case_when(
team_score > opponent_score ~ "W",
opponent_score > team_score ~ "L"
))) %>% na.omit()
games$TeamResult <- relevel(games$TeamResult, ref="W")
cumulativesimplemodelgames <- games %>% select(game_id, game_date, team_short_display_name, opponent_short_display_name, season, opponent_efficiency_margin, team_efficiency_margin, team_sos, team_srs, opponent_sos, opponent_srs, team_luck, opponent_luck, team_recency, opponent_recency, TeamResult)
westround1games <- tibble(
team_short_display_name="Gonzaga",
opponent_short_display_name="Georgia State"
) %>% add_row(
team_short_display_name="Boise State",
opponent_short_display_name="Memphis"
) %>% add_row(
team_short_display_name="UConn",
opponent_short_display_name="New Mexico St"
) %>% add_row(
team_short_display_name="Arkansas",
opponent_short_display_name="Vermont"
) %>% add_row(
team_short_display_name="Alabama",
opponent_short_display_name="Notre Dame"
) %>% add_row(
team_short_display_name="Texas Tech",
opponent_short_display_name="Montana State"
) %>% add_row(
team_short_display_name="Michigan State",
opponent_short_display_name="Davidson"
) %>% add_row(
team_short_display_name="Duke",
opponent_short_display_name="CSU Fullerton"
)
westround1games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(westround1games)
westround1games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(westround1games)
westround1log <- log_fit %>% predict(new_data = westround1games) %>%
bind_cols(westround1games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
westround1log <- log_fit %>% predict(new_data = westround1log, type="prob") %>%
bind_cols(westround1log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
westround2games <- tibble(
team_short_display_name="Gonzaga",
opponent_short_display_name="Memphis"
) %>% add_row(
team_short_display_name="UConn",
opponent_short_display_name="Arkansas"
) %>% add_row(
team_short_display_name="Alabama",
opponent_short_display_name="Texas Tech"
) %>% add_row(
team_short_display_name="Michigan State",
opponent_short_display_name="Duke"
)
westround2games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(westround2games)
westround2games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(westround2games)
westround2log <- log_fit %>% predict(new_data = westround2games) %>%
bind_cols(westround2games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
westround2log <- log_fit %>% predict(new_data = westround2log, type="prob") %>%
bind_cols(westround2log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
westround3games <- tibble(
team_short_display_name="Gonzaga",
opponent_short_display_name="Arkansas"
) %>% add_row(
team_short_display_name="Alabama",
opponent_short_display_name="Duke"
)
westround3games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(westround3games)
westround3games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(westround3games)
westround3log <- log_fit %>% predict(new_data = westround3games) %>%
bind_cols(westround3games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
westround3log <- log_fit %>% predict(new_data = westround3log, type="prob") %>%
bind_cols(westround3log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
westround4games <- tibble(
team_short_display_name="Gonzaga",
opponent_short_display_name="Alabama"
)
westround4games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(westround4games)
westround4games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(westround4games)
westround4log <- log_fit %>% predict(new_data = westround4games) %>%
bind_cols(westround4games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
westround4log <- log_fit %>% predict(new_data = westround4log, type="prob") %>%
bind_cols(westround4log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
eastround1games <- tibble(
team_short_display_name="Baylor",
opponent_short_display_name="Norfolk State"
) %>% add_row(
team_short_display_name="North Carolina",
opponent_short_display_name="Marquette"
) %>% add_row(
team_short_display_name="Saint Mary's",
opponent_short_display_name="Indiana"
) %>% add_row(
team_short_display_name="UCLA",
opponent_short_display_name="Akron"
) %>% add_row(
team_short_display_name="Texas",
opponent_short_display_name="Virginia Tech"
) %>% add_row(
team_short_display_name="Purdue",
opponent_short_display_name="Yale"
) %>% add_row(
team_short_display_name="Murray State",
opponent_short_display_name="San Francisco"
) %>% add_row(
team_short_display_name="Kentucky",
opponent_short_display_name="Saint Peter's"
)
eastround1games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(eastround1games)
eastround1games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(eastround1games)
eastround1log <- log_fit %>% predict(new_data = eastround1games) %>%
bind_cols(eastround1games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
eastround1log <- log_fit %>% predict(new_data = eastround1log, type="prob") %>%
bind_cols(eastround1log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
eastround2games <- tibble(
team_short_display_name="Baylor",
opponent_short_display_name="North Carolina"
) %>% add_row(
team_short_display_name="Indiana",
opponent_short_display_name="UCLA"
) %>% add_row(
team_short_display_name="Texas",
opponent_short_display_name="Purdue"
) %>% add_row(
team_short_display_name="San Francisco",
opponent_short_display_name="Kentucky"
)
eastround2games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(eastround2games)
eastround2games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(eastround2games)
eastround2log <- log_fit %>% predict(new_data = eastround2games) %>%
bind_cols(eastround2games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
eastround2log <- log_fit %>% predict(new_data = eastround2log, type="prob") %>%
bind_cols(eastround2log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
eastround3games <- tibble(
team_short_display_name="Baylor",
opponent_short_display_name="UCLA"
) %>% add_row(
team_short_display_name="Purdue",
opponent_short_display_name="Kentucky"
)
eastround3games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(eastround3games)
eastround3games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(eastround3games)
eastround3log <- log_fit %>% predict(new_data = eastround3games) %>%
bind_cols(eastround3games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
eastround3log <- log_fit %>% predict(new_data = eastround3log, type="prob") %>%
bind_cols(eastround3log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
eastround4games <- tibble(
team_short_display_name="Baylor",
opponent_short_display_name="Purdue"
)
eastround4games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(eastround4games)
eastround4games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(eastround4games)
eastround4log <- log_fit %>% predict(new_data = eastround4games) %>%
bind_cols(eastround4games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
eastround4log <- log_fit %>% predict(new_data = eastround4log, type="prob") %>%
bind_cols(eastround4log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
southround1games <- tibble(
team_short_display_name="Arizona",
opponent_short_display_name="Wright State"
) %>% add_row(
team_short_display_name="Seton Hall",
opponent_short_display_name="TCU"
) %>% add_row(
team_short_display_name="Houston",
opponent_short_display_name="UAB"
) %>% add_row(
team_short_display_name="Illinois",
opponent_short_display_name="Chattanooga"
) %>% add_row(
team_short_display_name="Colorado State",
opponent_short_display_name="Michigan"
) %>% add_row(
team_short_display_name="Tennessee",
opponent_short_display_name="Longwood"
) %>% add_row(
team_short_display_name="Ohio State",
opponent_short_display_name="Loyola Chicago"
) %>% add_row(
team_short_display_name="Villanova",
opponent_short_display_name="Delaware"
)
southround1games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(southround1games)
southround1games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(southround1games)
southround1log <- log_fit %>% predict(new_data = southround1games) %>%
bind_cols(southround1games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
southround1log <- log_fit %>% predict(new_data = southround1log, type="prob") %>%
bind_cols(southround1log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
southround2games <- tibble(
team_short_display_name="Arizona",
opponent_short_display_name="Seton Hall"
) %>% add_row(
team_short_display_name="Houston",
opponent_short_display_name="Illinois"
) %>% add_row(
team_short_display_name="Michigan",
opponent_short_display_name="Tennessee"
) %>% add_row(
team_short_display_name="Ohio State",
opponent_short_display_name="Villanova"
)
southround2games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(southround2games)
southround2games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(southround2games)
southround2log <- log_fit %>% predict(new_data = southround2games) %>%
bind_cols(southround2games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
southround2log <- log_fit %>% predict(new_data = southround2log, type="prob") %>%
bind_cols(southround2log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
southround3games <- tibble(
team_short_display_name="Arizona",
opponent_short_display_name="Illinois"
) %>% add_row(
team_short_display_name="Michigan",
opponent_short_display_name="Villanova"
)
southround3games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(southround3games)
southround3games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(southround3games)
southround3log <- log_fit %>% predict(new_data = southround3games) %>%
bind_cols(southround3games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
southround3log <- log_fit %>% predict(new_data = southround3log, type="prob") %>%
bind_cols(southround3log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
southround4games <- tibble(
team_short_display_name="Michigan",
opponent_short_display_name="Illinois"
)
southround4games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(southround4games)
southround4games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(southround4games)
southround4log <- log_fit %>% predict(new_data = southround4games) %>%
bind_cols(southround4games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
southround4log <- log_fit %>% predict(new_data = southround4log, type="prob") %>%
bind_cols(southround4log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
midwestround1games <- tibble(
team_short_display_name="Kansas",
opponent_short_display_name="Texas Southern"
) %>% add_row(
team_short_display_name="San Diego State",
opponent_short_display_name="Creighton"
) %>% add_row(
team_short_display_name="Iowa",
opponent_short_display_name="Richmond"
) %>% add_row(
team_short_display_name="Providence",
opponent_short_display_name="S Dakota St"
) %>% add_row(
team_short_display_name="LSU",
opponent_short_display_name="Iowa State"
) %>% add_row(
team_short_display_name="Wisconsin",
opponent_short_display_name="Colgate"
) %>% add_row(
team_short_display_name="USC",
opponent_short_display_name="Miami"
) %>% add_row(
team_short_display_name="Auburn",
opponent_short_display_name="J'Ville St"
)
midwestround1games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(midwestround1games)
midwestround1games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(midwestround1games)
midwestround1log <- log_fit %>% predict(new_data = midwestround1games) %>%
bind_cols(midwestround1games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
midwestround1log <- log_fit %>% predict(new_data = midwestround1log, type="prob") %>%
bind_cols(midwestround1log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
midwestround2games <- tibble(
team_short_display_name="Kansas",
opponent_short_display_name="Creighton"
) %>% add_row(
team_short_display_name="Iowa",
opponent_short_display_name="Providence"
) %>% add_row(
team_short_display_name="LSU",
opponent_short_display_name="Wisconsin"
) %>% add_row(
team_short_display_name="USC",
opponent_short_display_name="Auburn"
)
midwestround2games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(midwestround2games)
midwestround2games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(midwestround2games)
midwestround2log <- log_fit %>% predict(new_data = midwestround2games) %>%
bind_cols(midwestround2games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
midwestround2log <- log_fit %>% predict(new_data = midwestround2log, type="prob") %>%
bind_cols(midwestround2log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
midwestround3games <- tibble(
team_short_display_name="Creighton",
opponent_short_display_name="Iowa"
) %>% add_row(
team_short_display_name="Wisconsin",
opponent_short_display_name="USC"
)
midwestround3games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(midwestround3games)
midwestround3games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(midwestround3games)
midwestround3log <- log_fit %>% predict(new_data = midwestround3games) %>%
bind_cols(midwestround3games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
midwestround3log <- log_fit %>% predict(new_data = midwestround3log, type="prob") %>%
bind_cols(midwestround3log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
midwestround4games <- tibble(
team_short_display_name="Creighton",
opponent_short_display_name="USC"
)
midwestround4games <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(midwestround4games)
midwestround4games <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(midwestround4games)
midwestround4log <- log_fit %>% predict(new_data = midwestround4games) %>%
bind_cols(midwestround4games) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
midwestround4log <- log_fit %>% predict(new_data = midwestround4log, type="prob") %>%
bind_cols(midwestround4log) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
finalfourgames <- tibble(
team_short_display_name="Gonzaga",
opponent_short_display_name="Baylor"
) %>% add_row(
team_short_display_name="Illinois",
opponent_short_display_name="USC"
)
finalfourgames <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(finalfourgames)
finalfourgames <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(finalfourgames)
finalfourlog <- log_fit %>% predict(new_data = finalfourgames) %>%
bind_cols(finalfourgames) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
finalfourlog <- log_fit %>% predict(new_data = finalfourlog, type="prob") %>%
bind_cols(finalfourlog) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())
champs <- tibble(
team_short_display_name="Baylor",
opponent_short_display_name="Illinois"
)
champs <- cumulativesimplemodelgames %>% group_by(team_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% select(-TeamResult, -starts_with("opponent")) %>% right_join(champs)
champs <- cumulativesimplemodelgames %>% group_by(opponent_short_display_name) %>% filter(game_date == max(game_date) & season == 2022) %>% slice(1) %>% ungroup() %>% select(-TeamResult, -starts_with("team"), -game_id, -game_date, -season) %>% right_join(champs)
champslog <- log_fit %>% predict(new_data = champs) %>%
bind_cols(champs) %>% select(.pred_class, team_short_display_name, opponent_short_display_name, everything())
champslog <- log_fit %>% predict(new_data = champslog, type="prob") %>%
bind_cols(champslog) %>% select(.pred_class, .pred_W, .pred_L, team_short_display_name, opponent_short_display_name, everything())