I'd love to place letters representing the results of a Tukey's HSD on boxplots I make in ggplot. I am aware of other posts (Tukeys post-hoc on ggplot boxplot, Tukey's results on boxplot in R) but in my case, I work with facet_grid and I am not sure how to proceed. As I am creating a lot of boxplots, I would love to have something automated. Any tips to help me get me started would be greatly appreciated. Thanks a lot
library(multcompView)
library(ggplot2)
#sample dataframe
df <- data.frame(var_facet=c('facet1', 'facet1', 'facet1', 'facet1', 'facet1',
'facet1', 'facet1', 'facet1', 'facet1', 'facet1',
'facet2', 'facet2', 'facet2', 'facet2', 'facet2',
'facet2', 'facet2', 'facet2', 'facet2', 'facet2'),
class_x=c("Q1", "Q1", "Q1", "Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q3",
"Q1", "Q1", "Q1", "Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q3"),
value_y=c(12, 21, 35, 42, 56, 1, 4, 3, 4, 5, 2, 2, 2,
2, 2, 4, 4, 4, 4, 4))
#function
generate_label_df <- function(TUKEY, variable){
# Extract labels and factor levels from Tukey post-hoc
Tukey.levels <- variable[,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
#I need to put the labels in the same order as in the boxplot :
Tukey.labels$treatment=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
return(Tukey.labels)
}
model=lm(value_y~class_x, data=df)
ANOVA=aov(model)
TUKEY <- TukeyHSD(ANOVA)
plot(TUKEY , las=1 , col="brown" )
labels <- generate_label_df(TUKEY , TUKEY$`class_x`)#generate labels using function
names(labels) <- c('Letters','class_x')#rename columns for merging
yvalue <- aggregate(.~class_x, data=df, mean)# obtain letter position for y axis using means
final <- merge(labels,yvalue) #merge dataframes
#ggplot with facet_grid
p <- ggplot(data=df, aes(class_x , value_y, fill=class_x))+
geom_boxplot(outlier.shape = NA) +
facet_grid(~var_facet)
p <- p + scale_y_continuous(limits = quantile(df$value_y, c(0.1, 0.9), na.rm=T))
p <- p + theme_minimal()
p <- p + scale_fill_brewer(palette="Greens")
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
p <- p + geom_text(data = final,
aes(x = class_x, y = value_y, label = Letters),
vjust=-3.5,hjust=-.5)
p
Maybe this is what you are looking for.
p <- ggplot(data=df, aes(x=class_x , y=value_y, fill=class_x)) +
geom_boxplot(outlier.shape=NA) +
facet_grid(~var_facet) +
scale_fill_brewer(palette="Greens") +
theme_minimal() +
theme(legend.position="none") +
theme(axis.text.x=element_text(angle=45, hjust=1))
for (facetk in as.character(unique(df$var_facet))) {
subdf <- subset(df, var_facet==facetk)
model=lm(value_y ~ class_x, data=subdf)
ANOVA=aov(model)
TUKEY <- TukeyHSD(ANOVA)
labels <- generate_label_df(TUKEY , TUKEY$`class_x`)
names(labels) <- c('Letters','class_x')
yvalue <- aggregate(.~class_x, data=subdf, quantile, probs=.75)
final <- merge(labels, yvalue)
final$var_facet <- facetk
p <- p + geom_text(data = final, aes(x=class_x, y=value_y, label=Letters),
vjust=-1.5, hjust=-.5)
}
p