WHKYHAC: Pass Direction Success Rate Viz
The Viz Launchpad Competition
WHKYHAC and Sportlogiq announced a visualization competition using data from the 2023 Season of the Professional Women’s Hockey Players Association. The entry deadline for the competition is July 2, 2023. Prior to that deadline I’ll post some “trial” data visualizations here.
Today’s Data Viz
This post is about successful (and failed) passing plays. I’ll look at the success rate for a pass attempt based on its angle and the zone in which it originated.
Successful Pass Attempts Based On Pass Angle and Zone
This viz shows successful pass attempts (blue) and failed pass attempts (red) based on the angle of the pass attempt. For this purpose each pass attempt was put into one of eighteen groups based on its angle, and each group was separated into the zone from which the pass attempt originated. The pass attempts on the far right of each plot were “straight forward” from the position of the person making the pass towards the far end of the attacking zone. The pass attempts rotate from “straight forward” to “sideways” to “straight backward” as you move from the right to the left side of each zone plot.
Open the image in a new tab if you want to see a larger version.
It was essentially a coin flip as to whether a forward pass attempt was completed in many cases. Sideways and backward pass attempts succeeded more often, though backward pass attempts obviously do not have the merit of being in the direction of the opponent’s net.
The Code
Here’s the code for this data viz.
# SETUP ########################################################################
setwd("~/18_skaters/r_studio/whkyhac")
library(tidyverse)
library(stringr)
library(lubridate)
# LOAD DATA ####################################################################
raw_pbp_data <- read_csv("23_PBP_WHKYHAC_SPORTLOGIQ.csv",
locale = locale(encoding = "ISO-8859-1"))
# EXPLORE DATA #################################################################
# Players
player_names <- unique(raw_pbp_data$player)
# Events
event_names <- unique(raw_pbp_data$eventname)
event_outcomes <- unique(raw_pbp_data$outcome)
event_types <- unique(raw_pbp_data$type)
# Strength states
strength_states <- unique(raw_pbp_data$strengthstate)
# CLEAN AND MANIPULATE DATA (AREAS OF INTEREST ONLY) ###########################
clean_pbp_data <- raw_pbp_data
# Fix name for Kristin O’Neill
clean_pbp_data$player <- str_replace_all(clean_pbp_data$player, "\\031", "’")
# Add game_id
clean_pbp_data$game_id <- paste(clean_pbp_data$game, clean_pbp_data$date)
# Add event_id
clean_pbp_data$event_id <- seq(1:length(clean_pbp_data$seasonstage))
# Add empty_net to opposing_goalie variable
clean_pbp_data$opposing_goalie <- ifelse(
is.na(clean_pbp_data$opposing_goalie),
"empty_net",
clean_pbp_data$opposing_goalie)
# Reorganize a little
clean_pbp_data <- select(clean_pbp_data, c(30:31,1:29))
# GET PASS DATA ################################################################
# Shrink the data need for this viz
clean_pbp_data_viz <- clean_pbp_data %>%
select(event_id,
teamname,
eventname,
outcome,
type,
xadjcoord,
yadjcoord)
# Get successful pass / reception data (pass -> reception)
pass_data <- clean_pbp_data_viz %>%
filter(eventname == "pass")
successful_pass_data <- pass_data %>%
filter(outcome == "successful")
successful_pass_ids <- successful_pass_data$event_id
loop_list <- list()
for (i in 1:4) {
loop_data <- clean_pbp_data_viz %>%
filter(event_id %in% (successful_pass_ids + i)) %>%
filter(eventname == "reception")
loop_list[[i]] <- loop_data$event_id
successful_pass_ids <- setdiff(successful_pass_ids,
(loop_data$event_id - i))
}
successful_reception_ids <- unlist(loop_list)
successful_pass_ids <- successful_pass_data$event_id
target_successful_data <- clean_pbp_data_viz %>%
filter(event_id %in% successful_pass_ids |
event_id %in% successful_reception_ids) %>%
arrange(event_id)
# Quick check for pass > reception pattern in the target data
# This should print a repeating pattern of "1 2"
#options(max.print=100000)
#match(target_successful_data$eventname,
#c("pass", "reception"))
#options(max.print=1000)
# Add new event_ids
target_successful_data <- target_successful_data %>%
mutate(new_event_id = seq(1:length(target_successful_data$event_id)))
# Add zones based on location of "pass"
target_successful_data <- target_successful_data %>%
mutate(zone = case_when(
xadjcoord <= -25 ~ "DEFENSIVE ZONE",
xadjcoord >= 25 ~ "ATTACKING ZONE",
TRUE ~ "NEUTRAL ZONE"))
# Add the angle of the pass
# Angle = 0 for straight ahead
# Angle = 90 for straight sideways (either direction)
# Angle = 180 for straight backwards
target_successful_data <- target_successful_data %>%
mutate(angle = ifelse(
eventname == "pass",
abs((180 / pi) * atan((yadjcoord - lead(yadjcoord, n = 1)) / (xadjcoord - lead(xadjcoord, n = 1)))),
NA))
target_successful_data <- target_successful_data %>%
mutate(angle = ifelse(
eventname == "pass" &
xadjcoord > lead(xadjcoord, n = 1),
180 - angle,
angle))
# In cases where the pass and reception coordinates are identical the angle is NaN
# There are relatively few cases of this (32) so for current purposes I will simply filter them out
nan_data <- target_successful_data %>%
filter(angle == "NaN")
nan_ids <- nan_data$new_event_id
nan_filter_ids <- c(nan_ids, (nan_ids + 1))
target_successful_data <- target_successful_data %>%
filter(!new_event_id %in% nan_filter_ids)
# Group pass angles into 18 bins
target_successful_data <- target_successful_data %>%
mutate(angle_group = case_when(
angle > 170 ~ "group_A",
angle > 160 ~ "group_B",
angle > 150 ~ "group_C",
angle > 140 ~ "group_D",
angle > 130 ~ "group_E",
angle > 120 ~ "group_F",
angle > 110 ~ "group_G",
angle > 100 ~ "group_H",
angle > 90 ~ "group_I",
angle > 80 ~ "group_J",
angle > 70 ~ "group_K",
angle > 60 ~ "group_L",
angle > 50 ~ "group_M",
angle > 40 ~ "group_N",
angle > 30 ~ "group_O",
angle > 20 ~ "group_P",
angle > 10 ~ "group_Q",
angle >= 0 ~ "group_R"))
# Now do something similar for failed pass attempts
failed_pass_data <- pass_data %>%
filter(outcome == "failed")
failed_pass_ids <- failed_pass_data$event_id
# Explore subsequent events
failed_pass_subsequent_events <- clean_pbp_data_viz %>%
filter(event_id %in% (failed_pass_ids + 1))
summary_failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
group_by(eventname) %>%
summarize(events = n()) %>%
ungroup() %>%
arrange(desc(events))
# Look a little more closely at controlled exits
failed_controlled_exits <- failed_pass_subsequent_events %>%
filter(eventname == "controlledexit")
failed_controlled_exits_ids <- failed_controlled_exits$event_id
failed_controlled_exits_data <- clean_pbp_data_viz %>%
filter(event_id %in% c((failed_controlled_exits_ids - 1),
(failed_controlled_exits_ids),
(failed_controlled_exits_ids + 1),
failed_controlled_exits_ids + 2))
# This is a mixed bag - I will make the following assumptions:
# The location of blocks and failed receptions can be used to find pass angle
# The location of failed controlled exits is the same as failed receptions and therefore can be used to find pass angle
# The location of lpr is less reliable but I'll treat it as "good enough" - it's too bad this makes up the majority of events as it introduces more uncertainty about the pass angle
# The above items represent most of the failed pass attempts - I'll simply filter out the remaining items for current purposes
failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
filter(eventname == "lpr" |
eventname == "block" |
eventname == "reception" |
eventname == "controlledexit")
# Revised summary just to make sure things went smoothly
rev_summary_failed_pass_subsequent_events <- failed_pass_subsequent_events %>%
group_by(eventname) %>%
summarize(events = n()) %>%
ungroup() %>%
arrange(desc(events))
# Build out target failed pass data similar to successful pass data
target_failed_pass_ids <- failed_pass_subsequent_events$event_id
target_failed_data <- clean_pbp_data_viz %>%
filter(event_id %in% target_failed_pass_ids |
event_id %in% (target_failed_pass_ids - 1)) %>%
arrange(event_id)
# Add new event_ids
target_failed_data <- target_failed_data %>%
mutate(new_event_id = seq(1:length(target_failed_data$event_id)))
# Add zones based on location of "pass"
target_failed_data <- target_failed_data %>%
mutate(zone = case_when(
xadjcoord <= -25 ~ "DEFENSIVE ZONE",
xadjcoord >= 25 ~ "ATTACKING ZONE",
TRUE ~ "NEUTRAL ZONE"))
# Need to flip coordinates when the subsequent event team is not the same as the passing team
target_failed_data <- target_failed_data %>%
mutate(flip = ifelse(
eventname != "pass" &
teamname != lag(teamname),
TRUE,
FALSE))
target_failed_data <- target_failed_data %>%
mutate(xadjcoord = ifelse(
flip == TRUE,
xadjcoord * -1,
xadjcoord))
target_failed_data <- target_failed_data %>%
mutate(yadjcoord = ifelse(
flip == TRUE,
yadjcoord * -1,
yadjcoord))
# Add the angle of the pass
# Angle = 0 for straight ahead
# Angle = 90 for straight sideways (either direction)
# Angle = 180 for straight backwards
target_failed_data <- target_failed_data %>%
mutate(angle = ifelse(
eventname == "pass",
abs((180 / pi) * atan((yadjcoord - lead(yadjcoord, n = 1)) / (xadjcoord - lead(xadjcoord, n = 1)))),
NA))
target_failed_data <- target_failed_data %>%
mutate(angle = ifelse(
eventname == "pass" &
xadjcoord > lead(xadjcoord, n = 1),
180 - angle,
angle))
# Group pass angles into 18 bins
target_failed_data <- target_failed_data %>%
mutate(angle_group = case_when(
angle > 170 ~ "group_A",
angle > 160 ~ "group_B",
angle > 150 ~ "group_C",
angle > 140 ~ "group_D",
angle > 130 ~ "group_E",
angle > 120 ~ "group_F",
angle > 110 ~ "group_G",
angle > 100 ~ "group_H",
angle > 90 ~ "group_I",
angle > 80 ~ "group_J",
angle > 70 ~ "group_K",
angle > 60 ~ "group_L",
angle > 50 ~ "group_M",
angle > 40 ~ "group_N",
angle > 30 ~ "group_O",
angle > 20 ~ "group_P",
angle > 10 ~ "group_Q",
angle >= 0 ~ "group_R"))
# Filter out NaN cases (3)
nan_data_2 <- target_failed_data %>%
filter(angle == "NaN")
nan_ids_2 <- nan_data_2$new_event_id
nan_filter_ids_2 <- c(nan_ids_2, (nan_ids_2 + 1))
target_failed_data <- target_failed_data %>%
filter(!new_event_id %in% nan_filter_ids_2)
# PLOT #########################################################################
# Prep the plot data
plot_data_successful <- target_successful_data %>%
filter(eventname == "pass") %>%
group_by(angle_group, zone) %>%
summarize(successful_passes = n()) %>%
ungroup()
plot_data_failed <- target_failed_data %>%
filter(eventname == "pass") %>%
group_by(angle_group, zone) %>%
summarize(failed_passes = n()) %>%
ungroup()
plot_data <- plot_data_successful
plot_data$failed_passes <- plot_data_failed$failed_passes
plot_data <- plot_data %>%
mutate(success_rate = round((successful_passes / (successful_passes + failed_passes)) * 100))
# Plot the results
plot <- ggplot() +
geom_col(data = group_by(plot_data, angle_group),
aes(x = angle_group,
y = successful_passes),
fill = "blue3") +
geom_col(data = group_by(plot_data, angle_group),
aes(x = angle_group,
y = (failed_passes * -1)),
fill = "red3") +
geom_point(data = group_by(plot_data, angle_group),
aes(x = angle_group,
y = 1200,
colour = success_rate),
shape = "square",
size = 8) +
geom_text(data = group_by(plot_data, angle_group),
aes(x = angle_group,
y = 1205,
label = success_rate),
colour = "white",
size = 3.8,
fontface = "bold") +
annotate("text",
x = 16.5,
y = 1300,
label = "FORWARD") +
annotate("text",
x = 9.5,
y = 1300,
label = "SIDEWAYS") +
annotate("text",
x = 2.5,
y = 1300,
label = "BACKWARD") +
annotate("text",
x = 9.5,
y = 1120,
label = "Success Rate (%)") +
theme_minimal() +
theme(panel.grid = element_blank(),
plot.title = element_text(size = 20,
face = "bold"),
plot.subtitle = element_text(size = 16),
plot.caption = element_text(size = 14),
axis.title.x = element_blank(),
axis.text = element_blank(),
legend.position = "none",
strip.background = element_rect(fill = "grey18"),
strip.text = element_text(colour = "white",
size = 10,
face = "bold")) +
labs(title = "Direction Of Successful And Failed Pass Attempts",
subtitle = "The pass direction rotates from stright forward (on the right) to straight backward (on the left) in each zone plot",
y = "Number of Pass Attempts",
caption = "Data by Sportlogiq, viz by 18 Skaters, #WHKYHAC") +
scale_colour_gradient(low = "red", high = "blue") +
facet_wrap(vars(zone))
#plot
The End
That’s it. I’ll post more data visualizations in the days leading up to the July 2 deadline for the Viz Launchpad Competition.
Mark (18 Skaters)