WHKYHAC: End Of Possession Viz
The Viz Launchpad Competition
WHKYHAC and Sportlogiq announced a visualization competition using data from the 2023 Season of the Professional Women’s Hockey Players Association. The entry deadline for the competition is July 2, 2023. This will be my final “trial” data visualization before the entry deadline.
WHKYHAC and Sportlogiq announced a visualization competition using data from the 2023 Season of the Professional Women’s Hockey Players Association. The entry deadline for the competition is July 2, 2023. This will be my final “trial” data visualization before the entry deadline.
How Did Teams Lose Possession Of The Puck?
This viz is intended to be clear and concise. The viewer should immediately understand that successful passing is important when it comes to maintaining possession of the puck.
I know some people don’t like this type of data viz (i.e., anything that resembles a pie chart). In this case I think it works to deliver the simple message that passing is important when it comes to maintaining puck possession.
Open the image in a new tab if you want to see a larger version.
This simple data viz doesn’t provide any useful information about how to improve passing. For a look at the success rate of a pass attempt based on its angle and the zone in which it originated check out my previous post.
This viz is intended to be clear and concise. The viewer should immediately understand that successful passing is important when it comes to maintaining possession of the puck.
I know some people don’t like this type of data viz (i.e., anything that resembles a pie chart). In this case I think it works to deliver the simple message that passing is important when it comes to maintaining puck possession.
Open the image in a new tab if you want to see a larger version.
This simple data viz doesn’t provide any useful information about how to improve passing. For a look at the success rate of a pass attempt based on its angle and the zone in which it originated check out my previous post.
The Code
Here’s the code for this data viz.
# SETUP ########################################################################
setwd("~/18_skaters/r_studio/whkyhac")
library(tidyverse)
library(stringr)
library(lubridate)
library(geomtextpath)
# LOAD DATA ####################################################################
raw_pbp_data <- read_csv("23_PBP_WHKYHAC_SPORTLOGIQ.csv",
locale = locale(encoding = "ISO-8859-1"))
# EXPLORE DATA #################################################################
#print(str(raw_pbp_data))
# Players
player_names <- unique(raw_pbp_data$player)
# Events
event_names <- unique(raw_pbp_data$eventname)
event_outcomes <- unique(raw_pbp_data$outcome)
event_types <- unique(raw_pbp_data$type)
# Strength states
strength_states <- unique(raw_pbp_data$strengthstate)
# CLEAN AND MANIPULATE DATA (AREAS OF INTEREST ONLY) ###########################
# Note: much of this script was originally written for a different viz that looked at all shot attempt assists
clean_pbp_data <- raw_pbp_data
# Fix name for Kristin O’Neill
clean_pbp_data$player <- str_replace_all(clean_pbp_data$player, "\\031", "’")
# Add game_id
clean_pbp_data$game_id <- paste(clean_pbp_data$game, clean_pbp_data$date)
# Add event_id
clean_pbp_data$event_id <- seq(1:length(clean_pbp_data$seasonstage))
# Add empty_net to opposing_goalie variable
clean_pbp_data$opposing_goalie <- ifelse(
is.na(clean_pbp_data$opposing_goalie),
"empty_net",
clean_pbp_data$opposing_goalie)
# Reorganize
clean_pbp_data <- select(clean_pbp_data, c(30:31,1:29))
# EXPLORE POSSESSION DATA ######################################################
# Isolate a single game
exploration_data <- filter(clean_pbp_data,
game_id == "ADI-HAR 2022-10-15")
# Get final possession event_ids and add them to play-by-play data
end_possession_data <- exploration_data %>%
group_by(currentpossession) %>%
summarise(end_possession_event_id = last(event_id)) %>%
ungroup()
end_possession_ids <- end_possession_data$end_possession_event_id
exploration_data$end_possession <- ifelse(
exploration_data$event_id %in% end_possession_ids,
TRUE,
FALSE)
exploration_data <- exploration_data %>%
select(c(1:10, 32, 11:31))
# Filter for end possession events and summarize
end_possession_events_explore <- exploration_data %>%
filter(end_possession == TRUE) %>%
group_by(eventname) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup()
# Shrink data to make it easier to explore relationship between shot/pass/block
exploration_data <- exploration_data %>%
select(end_possession,
eventname,
type,
outcome)
# Extend this approach to all play-by-play data and summarize
all_end_possession_data <- clean_pbp_data %>%
group_by(game_id, currentpossession) %>%
summarise(end_possession_event_id = last(event_id)) %>%
ungroup()
all_end_possession_ids <- all_end_possession_data$end_possession_event_id
clean_pbp_data$end_possession <- ifelse(
clean_pbp_data$event_id %in% all_end_possession_ids,
TRUE,
FALSE)
clean_pbp_data <- clean_pbp_data %>%
select(c(1:10, 32, 11:31))
end_possession_events_all <- clean_pbp_data %>%
filter(end_possession == TRUE) %>%
group_by(eventname) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup()
# Make some adjustments to this summary
# Start with the 151 FAILED blocks classified as end_events
failed_blocks <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "failed")
failed_block_types <- unique(failed_blocks$type)
# It looks like the failed blocks should be attributed to shots and dump outs
failed_block_ids <- failed_blocks$event_id
failed_block_prior_events_id <- c(failed_block_ids -1,
failed_block_ids -2,
failed_block_ids -3)
failed_block_prior_events <- clean_pbp_data %>%
filter(event_id %in% failed_block_prior_events_id) %>%
group_by(eventname) %>%
summarize (prior_events = n()) %>%
arrange(desc(prior_events)) %>%
ungroup()
# There were 115 shots and 36 dump outs in prior events, which equals 151 :)
# Examine events around successful lpr and puck protection
success_lpr_pp_ids <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "lpr" | eventname == "puckprotection",
outcome == "successful")
success_lpr_pp_ids <- success_lpr_pp_ids$event_id
success_lpr_pp_other_event_ids <- c(success_lpr_pp_ids -3,
success_lpr_pp_ids -2,
success_lpr_pp_ids -1,
success_lpr_pp_ids,
success_lpr_pp_ids +1,
success_lpr_pp_ids + 2,
success_lpr_pp_ids +3)
success_lpr_pp_other_event <- clean_pbp_data %>%
filter(event_id %in% success_lpr_pp_other_event_ids) %>%
select(event_id,
end_possession,
eventname,
type,
outcome) %>%
arrange()
# It looks like most of the successful events were followed by a faceoff
success_lpr_pp_next_event <- clean_pbp_data %>%
filter(event_id %in% (success_lpr_pp_ids +1)) %>%
group_by(eventname) %>%
summarize(next_events = n()) %>%
ungroup()
# For current purposes I will simply assume that the successful events should not be treated as a loss of possession
# There are 18 passes with an undetermined outcome
# According to the data dictionary this happens when there is a game stoppage - I'll assume this should not be treated as a loss of possession
# Breakdown the successful blocks
blocks_breakdown <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "successful") %>%
group_by(type) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
ungroup()
blocks_breakdown <- blocks_breakdown %>%
mutate(proportion = count / sum(count))
# Roughly 75% of blocks are attributable to blocked passes
# Manually adjust some of the end_event totals:
# Subtract failed blocks (added to shot and dumpout)
# Add 115 shots
# Add 36 dump outs
# Subtract successful lpr outcomes
# Subtract successful puck protection outcomes
# Subtract undetermined pass by outcomes
end_possession_events_all_adjusted <- end_possession_events_all %>%
mutate(end_events = case_when(
eventname == "block" ~ successes,
eventname == "shot" ~ end_events + 115,
eventname == "dumpout" ~ end_events + 36,
eventname == "lpr" ~ fails,
eventname == "puckprotection" ~ fails,
eventname == "pass" ~ fails,
TRUE ~ end_events))
# Breakdown successful blocks by type and remove generic blocks
blocks_bolt_on <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "successful") %>%
group_by(type) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup() %>%
rename(eventname = type)
blocks_bolt_on$eventname <- paste0("blocked_", blocks_bolt_on$eventname)
end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
bind_rows(blocks_bolt_on) %>%
arrange(desc(end_events))
end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
filter(eventname != "block")
# Finalize the data
# I'll exclude shots from the list because they're usually a "good" way to end possession
# I'll also filter for events with 1,000+ instances
plot_data <- end_possession_events_all_adjusted %>%
filter(end_events >= 1000,
eventname != "shot") %>%
select(c(1:2))
plot_data <- plot_data %>%
mutate(perc = round(end_events / sum(end_events),2) * 100) %>%
mutate(label = paste0(perc, "%"))
plot_data[1,1] <- "FAILED PASS"
plot_data[2,1] <- "BLOCKED PASS"
plot_data[3,1] <- "FAILED \n PUCK RECOVERY"
plot_data[4,1] <- "FAILED \n PUCK PROTECTION"
plot_data[5,1] <- "DUMP IN"
plot_data[6,1] <- "DUMP OUT"
plot_data[7,1] <- "OPPONENT \n CHECK"
plot_data <- plot_data %>%
mutate(eventname = fct_reorder(eventname, end_events))
# PLOT THE DATA ################################################################
plot <- ggplot(plot_data,
aes(x = eventname,
y = end_events,
fill = eventname,
label = label)) +
geom_col(show.legend = FALSE,
width = 1,
colour = "grey38") +
theme_minimal() +
geom_text(aes(colour = eventname),
nudge_y = -450,
nudge_x = 0.03,
show.legend = FALSE,
fontface = "bold",
size = 5) +
theme(aspect.ratio = 1,
axis.text.x = element_text(size = 18),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(size = 20,
face = "bold"),
plot.subtitle = element_text(size = 16),
plot.title.position = "plot",
plot.caption = element_text(size = 14)) +
labs(x = NULL,
y = NULL,
title = "How did teams lose possession of the puck?",
subtitle = "PWHPA 2023 Season",
caption = "Data by Sportlogiq, viz by 18 Skaters, #WHKYHAC") +
scale_fill_manual(values = c("grey90", "grey90", "grey90", "grey90", "grey90", "purple", "purple3")) +
scale_colour_manual(values = c("grey20", "grey20", "grey20", "grey20", "grey20", "yellow", "yellow")) +
coord_curvedpolar()
#plot
Here’s the code for this data viz.
# SETUP ########################################################################
setwd("~/18_skaters/r_studio/whkyhac")
library(tidyverse)
library(stringr)
library(lubridate)
library(geomtextpath)
# LOAD DATA ####################################################################
raw_pbp_data <- read_csv("23_PBP_WHKYHAC_SPORTLOGIQ.csv",
locale = locale(encoding = "ISO-8859-1"))
# EXPLORE DATA #################################################################
#print(str(raw_pbp_data))
# Players
player_names <- unique(raw_pbp_data$player)
# Events
event_names <- unique(raw_pbp_data$eventname)
event_outcomes <- unique(raw_pbp_data$outcome)
event_types <- unique(raw_pbp_data$type)
# Strength states
strength_states <- unique(raw_pbp_data$strengthstate)
# CLEAN AND MANIPULATE DATA (AREAS OF INTEREST ONLY) ###########################
# Note: much of this script was originally written for a different viz that looked at all shot attempt assists
clean_pbp_data <- raw_pbp_data
# Fix name for Kristin O’Neill
clean_pbp_data$player <- str_replace_all(clean_pbp_data$player, "\\031", "’")
# Add game_id
clean_pbp_data$game_id <- paste(clean_pbp_data$game, clean_pbp_data$date)
# Add event_id
clean_pbp_data$event_id <- seq(1:length(clean_pbp_data$seasonstage))
# Add empty_net to opposing_goalie variable
clean_pbp_data$opposing_goalie <- ifelse(
is.na(clean_pbp_data$opposing_goalie),
"empty_net",
clean_pbp_data$opposing_goalie)
# Reorganize
clean_pbp_data <- select(clean_pbp_data, c(30:31,1:29))
# EXPLORE POSSESSION DATA ######################################################
# Isolate a single game
exploration_data <- filter(clean_pbp_data,
game_id == "ADI-HAR 2022-10-15")
# Get final possession event_ids and add them to play-by-play data
end_possession_data <- exploration_data %>%
group_by(currentpossession) %>%
summarise(end_possession_event_id = last(event_id)) %>%
ungroup()
end_possession_ids <- end_possession_data$end_possession_event_id
exploration_data$end_possession <- ifelse(
exploration_data$event_id %in% end_possession_ids,
TRUE,
FALSE)
exploration_data <- exploration_data %>%
select(c(1:10, 32, 11:31))
# Filter for end possession events and summarize
end_possession_events_explore <- exploration_data %>%
filter(end_possession == TRUE) %>%
group_by(eventname) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup()
# Shrink data to make it easier to explore relationship between shot/pass/block
exploration_data <- exploration_data %>%
select(end_possession,
eventname,
type,
outcome)
# Extend this approach to all play-by-play data and summarize
all_end_possession_data <- clean_pbp_data %>%
group_by(game_id, currentpossession) %>%
summarise(end_possession_event_id = last(event_id)) %>%
ungroup()
all_end_possession_ids <- all_end_possession_data$end_possession_event_id
clean_pbp_data$end_possession <- ifelse(
clean_pbp_data$event_id %in% all_end_possession_ids,
TRUE,
FALSE)
clean_pbp_data <- clean_pbp_data %>%
select(c(1:10, 32, 11:31))
end_possession_events_all <- clean_pbp_data %>%
filter(end_possession == TRUE) %>%
group_by(eventname) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup()
# Make some adjustments to this summary
# Start with the 151 FAILED blocks classified as end_events
failed_blocks <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "failed")
failed_block_types <- unique(failed_blocks$type)
# It looks like the failed blocks should be attributed to shots and dump outs
failed_block_ids <- failed_blocks$event_id
failed_block_prior_events_id <- c(failed_block_ids -1,
failed_block_ids -2,
failed_block_ids -3)
failed_block_prior_events <- clean_pbp_data %>%
filter(event_id %in% failed_block_prior_events_id) %>%
group_by(eventname) %>%
summarize (prior_events = n()) %>%
arrange(desc(prior_events)) %>%
ungroup()
# There were 115 shots and 36 dump outs in prior events, which equals 151 :)
# Examine events around successful lpr and puck protection
success_lpr_pp_ids <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "lpr" | eventname == "puckprotection",
outcome == "successful")
success_lpr_pp_ids <- success_lpr_pp_ids$event_id
success_lpr_pp_other_event_ids <- c(success_lpr_pp_ids -3,
success_lpr_pp_ids -2,
success_lpr_pp_ids -1,
success_lpr_pp_ids,
success_lpr_pp_ids +1,
success_lpr_pp_ids + 2,
success_lpr_pp_ids +3)
success_lpr_pp_other_event <- clean_pbp_data %>%
filter(event_id %in% success_lpr_pp_other_event_ids) %>%
select(event_id,
end_possession,
eventname,
type,
outcome) %>%
arrange()
# It looks like most of the successful events were followed by a faceoff
success_lpr_pp_next_event <- clean_pbp_data %>%
filter(event_id %in% (success_lpr_pp_ids +1)) %>%
group_by(eventname) %>%
summarize(next_events = n()) %>%
ungroup()
# For current purposes I will simply assume that the successful events should not be treated as a loss of possession
# There are 18 passes with an undetermined outcome
# According to the data dictionary this happens when there is a game stoppage - I'll assume this should not be treated as a loss of possession
# Breakdown the successful blocks
blocks_breakdown <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "successful") %>%
group_by(type) %>%
summarize(count = n()) %>%
arrange(desc(count)) %>%
ungroup()
blocks_breakdown <- blocks_breakdown %>%
mutate(proportion = count / sum(count))
# Roughly 75% of blocks are attributable to blocked passes
# Manually adjust some of the end_event totals:
# Subtract failed blocks (added to shot and dumpout)
# Add 115 shots
# Add 36 dump outs
# Subtract successful lpr outcomes
# Subtract successful puck protection outcomes
# Subtract undetermined pass by outcomes
end_possession_events_all_adjusted <- end_possession_events_all %>%
mutate(end_events = case_when(
eventname == "block" ~ successes,
eventname == "shot" ~ end_events + 115,
eventname == "dumpout" ~ end_events + 36,
eventname == "lpr" ~ fails,
eventname == "puckprotection" ~ fails,
eventname == "pass" ~ fails,
TRUE ~ end_events))
# Breakdown successful blocks by type and remove generic blocks
blocks_bolt_on <- clean_pbp_data %>%
filter(end_possession == TRUE,
eventname == "block",
outcome == "successful") %>%
group_by(type) %>%
summarize(end_events = n(),
fails = sum(outcome == "failed"),
successes = sum(outcome == "successful"),
undetermined = sum (outcome == "undetermined")) %>%
arrange(desc(end_events)) %>%
ungroup() %>%
rename(eventname = type)
blocks_bolt_on$eventname <- paste0("blocked_", blocks_bolt_on$eventname)
end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
bind_rows(blocks_bolt_on) %>%
arrange(desc(end_events))
end_possession_events_all_adjusted <- end_possession_events_all_adjusted %>%
filter(eventname != "block")
# Finalize the data
# I'll exclude shots from the list because they're usually a "good" way to end possession
# I'll also filter for events with 1,000+ instances
plot_data <- end_possession_events_all_adjusted %>%
filter(end_events >= 1000,
eventname != "shot") %>%
select(c(1:2))
plot_data <- plot_data %>%
mutate(perc = round(end_events / sum(end_events),2) * 100) %>%
mutate(label = paste0(perc, "%"))
plot_data[1,1] <- "FAILED PASS"
plot_data[2,1] <- "BLOCKED PASS"
plot_data[3,1] <- "FAILED \n PUCK RECOVERY"
plot_data[4,1] <- "FAILED \n PUCK PROTECTION"
plot_data[5,1] <- "DUMP IN"
plot_data[6,1] <- "DUMP OUT"
plot_data[7,1] <- "OPPONENT \n CHECK"
plot_data <- plot_data %>%
mutate(eventname = fct_reorder(eventname, end_events))
# PLOT THE DATA ################################################################
plot <- ggplot(plot_data,
aes(x = eventname,
y = end_events,
fill = eventname,
label = label)) +
geom_col(show.legend = FALSE,
width = 1,
colour = "grey38") +
theme_minimal() +
geom_text(aes(colour = eventname),
nudge_y = -450,
nudge_x = 0.03,
show.legend = FALSE,
fontface = "bold",
size = 5) +
theme(aspect.ratio = 1,
axis.text.x = element_text(size = 18),
panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(size = 20,
face = "bold"),
plot.subtitle = element_text(size = 16),
plot.title.position = "plot",
plot.caption = element_text(size = 14)) +
labs(x = NULL,
y = NULL,
title = "How did teams lose possession of the puck?",
subtitle = "PWHPA 2023 Season",
caption = "Data by Sportlogiq, viz by 18 Skaters, #WHKYHAC") +
scale_fill_manual(values = c("grey90", "grey90", "grey90", "grey90", "grey90", "purple", "purple3")) +
scale_colour_manual(values = c("grey20", "grey20", "grey20", "grey20", "grey20", "yellow", "yellow")) +
coord_curvedpolar()
#plot
The End
That’s it. I’ll post my submission to the Viz Launchpad Competition in the next few days.
Mark (18 Skaters)
That’s it. I’ll post my submission to the Viz Launchpad Competition in the next few days.
Mark (18 Skaters)