I want to define a generic function
func_boxplot2 <- function(tmp, xvar, yvar, groupvar)
{
xvar <- enquo(xvar)
yvar <- enquo(yvar)
groupvar <- enquo(groupvar)
# If variable yield exists, put concentrations to NA for all yields < annual_yield_thres
if( "yield" %in% names(tmp) )
{
tmp <- tmp %>%
mutate_at(vars(!!yvar), ~ifelse(round(yield, 0) < 85, NA, .))
}
# Compute IQR for each year
tmp_iqr <- tmp %>%
group_by(!!groupvar) %>%
summarise(iqr=IQR(!!yvar, na.rm = TRUE))
p <- ggplot(data = tmp %>% mutate_at(vars(!!yvar), ~ifelse(tmp_iqr[which(tmp_iqr[[!!groupvar]] %in% (!!xvar)),]$iqr == 0, . + runif(1, -0.01, 0.01), . )), aes(x = !!xvar, y = !!yvar))
p <- p + stat_boxplot(aes(group = !!groupvar), na.rm = TRUE, coef = 10000) # Trick (large unrealistic coef value) so whiskers end at min(y) & max(y)
p <- p + geom_boxplot(na.rm = TRUE, outlier.shape = NA)
return(p)
}
which is able to plot boxplot whiskers extending to min/max even when the IQR is 0. I am trying to achieve this by adding tiny random numbers (below significance level) to the incriminate data to avoid IQR=0.
However, I must have missed something in the syntax about quosure, because running this function
func_boxplot2(data, date, days, date)
with the dataset
structure(list(date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L), .Label = c("2010", "2011", "2012",
"2013", "2014", "2015", "2016", "2017", "2018", "2019"), class = c("ordered",
"factor")), station = c("41B011", "41MEU1", "41N043", "41R001",
"41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012",
"41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1",
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1", "41B011",
"41MEU1", "41N043", "41R001", "41R012", "41WOL1", "41B011", "41MEU1",
"41N043", "41R001", "41R012", "41WOL1", "41B011", "41MEU1", "41N043",
"41R001", "41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001",
"41R012", "41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012",
"41WOL1", "41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"
), days = c(16, 15, 45, 26, 14, 14, 32, 7, 87, 42, 24, 23, 25,
25, 55, 29, 29, 16, 11, 14, 58, 21, 19, 10, 10, 14, 33, 18, 10,
7, 9, 10, 19, 7, 8, 7, 1, 5, 15, 8, 1, 4, 5, 6, 14, 6, 5, 5,
3, 5, 19, 8, 4, 5, 3, 4, 16, 3, 1, 3), yield = c(98.4817351598173,
49.4520547945205, 95.8561643835616, 97.6712328767123, 98.2648401826484,
95.1598173515982, 97.8767123287671, 27.9109589041096, 98.310502283105,
98.972602739726, 97.203196347032, 96.2100456621005, 98.7818761384335,
96.7554644808743, 97.4954462659381, 98.8046448087432, 98.747723132969,
98.3037340619308, 99.0525114155251, 96.1986301369863, 97.1004566210046,
96.4954337899543, 96.3698630136986, 98.2077625570776, 96.62100456621,
98.3675799086758, 95.6963470319635, 96.8835616438356, 93.5844748858447,
87.8196347031963, 91.2328767123288, 92.5570776255708, 81.5182648401827,
82.7739726027397, 90.1826484018265, 87.1461187214612, 87.2153916211293,
92.9986338797814, 94.6948998178506, 85.5760473588342, 92.3611111111111,
96.2204007285975, 86.3698630136986, 86.4269406392694, 87.796803652968,
93.2762557077626, 96.6438356164384, 95.6164383561644, 71.3812785388128,
93.7442922374429, 96.3698630136986, 97.2602739726027, 95.7876712328767,
94.7146118721461, 87.6141552511416, 43.0821917808219, 88.6872146118722,
92.6826484018265, 90.365296803653, 86.541095890411), environ = structure(c(5L,
4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L,
3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L,
3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L, 5L,
4L, 6L, 3L, 5L, 3L, 5L, 4L, 6L, 3L, 5L, 3L), .Label = c("Urbain avec très forte influence du trafic",
"Urbain avec forte influence du trafic", "Urbain avec influence modérée du trafic",
"Urbain avec faible influence du trafic", "Urbain avec très faible influence du trafic",
"Industriel avec influence modérée du trafic"), class = "factor")), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
gives me the following errors
Error: Problem with `mutate()` input `days`.
x Must extract column with a single valid subscript.
x Subscript `date` has size 60 but must be size 1.
ℹ Input `days` is `(structure(function (..., .x = ..1, .y = ..2, . = ..1) ...`.
What is wrong in my syntax, please ?
Many thanks,
A.
======== UPDATE ==========
Using the suggested updated function
func_boxplot2 <- function(tmp, xvar, yvar, groupvar)
{
# If variable yield exists, put concentrations to NA for all yields < annual_yield_thres
if("yield" %in% names(tmp)) {
tmp <-
tmp %>%
mutate(across({{yvar}}, ~ifelse(round(yield, 0) < 85, NA, .)))
}
tmp <-
tmp %>%
group_by({{groupvar}}) %>%
mutate(
across({{yvar}}, function (x) {
ifelse(
IQR({{yvar}}, na.rm = TRUE) == 0,
x + runif(1, -0.01,0.01),
x
)
})
)
ggplot(tmp, aes(x = {{xvar}}, y = {{yvar}})) +
stat_boxplot(aes(group = {{groupvar}}), na.rm = TRUE, coef = 10000) +
geom_boxplot(na.rm = TRUE, outlier.shape = NA)
}
results in the following plot
As stated in my comment, it seems that the processing of tmp
output for all rows of the same year the first value yvar
of this year which explains the plot. Indeed, commenting this block gives to following figure
The condition in ifelse
wasn't the correct length. You can change the data
ggplot
argument to this.
data =
tmp %>%
group_by(!!groupvar) %>%
mutate_at(
vars(!!yvar),
if (IQR(., na.rm = TRUE) == 0) {
. + runif(1, -0.01,0.01)
} else {
.
}
)
Your usage of quosure
and !!
is correct, however you should use the more recent {{
operator instead.
This is the updated function
func_boxplot2 <- function(tmp, xvar, yvar, groupvar)
{
# If variable yield exists, put concentrations to NA for all yields < annual_yield_thres
if("yield" %in% names(tmp)) {
tmp <-
tmp %>%
mutate(across({{yvar}}, ~ifelse(round(yield, 0) < 85, NA, .)))
}
tmp <-
tmp %>%
group_by({{groupvar}}) %>%
mutate(
across({{yvar}}, function (x) {
if (IQR(x, na.rm = TRUE) == 0) {
x + runif(length(x), -0.01, 0.01)
} else {
x
}
})
)
ggplot(tmp, aes(x = {{xvar}}, y = {{yvar}})) +
stat_boxplot(aes(group = {{groupvar}}), na.rm = TRUE, coef = 10000) +
geom_boxplot(na.rm = TRUE, outlier.shape = NA)
}
func_boxplot2(data, date, days, date)