Hey everyone I want to label outliers with the unique id in my geom_boxplot
. In general I use this formula (whatever is the data column) is the id I want:
plot +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
This works fine, if I only have one boxplot (plot_all). However, if I separate the boxplots by a value (e.g. sex
), the outliers are calculated based on the whole date and not on the subset of data. Is there anyway I can forward the selected data to calculate for those only?
rm(list=ls())
library(tidyverse)
library(ggplot2)
library(ggbeeswarm)
set.seed(1)
male <- tibble(sex = sample('male', 1000, replace=TRUE),
id = as.character(1:1000),
whatever = c(runif(998, min = .75, max = 1.25), 0, 2))
female <- tibble(sex = sample('female', 1000, replace=TRUE),
id = as.character(1001:2000),
whatever = c(runif(998, min = 1.25, max = 1.75), .5, 2.5))
data <- bind_rows(male, female)
plot_all <- ggplot(data, aes("all", whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
plot_sex <- ggplot(data, aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
It is possible to do this by manipulating the layer data, but why would you? Just modify the data on the way into ggplot:
data %>%
group_by(sex) %>%
mutate(label = ifelse(whatever > max(boxplot.stats(whatever)$stats) |
whatever < min(boxplot.stats(whatever)$stats),
id, '')) %>%
ggplot(aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = label), hjust = -1)
This is tidier than altering the layer data (which gives the same result)
ggplot(data, aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(data = . %>% group_by(sex) %>% filter((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25)))),
aes(label = id), hjust = -1)