Search code examples
rggplot2facet-gridtukey

Tukey test results on geom_boxplot with facet_grid


I'd love to place letters representing the results of a Tukey's HSD on boxplots I make in ggplot. I am aware of other posts (Tukeys post-hoc on ggplot boxplot, Tukey's results on boxplot in R) but in my case, I work with facet_grid and I am not sure how to proceed. As I am creating a lot of boxplots, I would love to have something automated. Any tips to help me get me started would be greatly appreciated. Thanks a lot

library(multcompView)
library(ggplot2)
#sample dataframe
df <- data.frame(var_facet=c('facet1', 'facet1', 'facet1', 'facet1', 'facet1', 
                             'facet1', 'facet1', 'facet1', 'facet1', 'facet1', 
                             'facet2', 'facet2', 'facet2', 'facet2', 'facet2', 
                             'facet2', 'facet2', 'facet2', 'facet2', 'facet2'), 
                 class_x=c("Q1", "Q1", "Q1", "Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q3", 
                              "Q1", "Q1", "Q1", "Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q3"),
                 value_y=c(12, 21, 35, 42, 56, 1, 4, 3, 4, 5, 2, 2, 2, 
                           2, 2, 4, 4, 4, 4, 4))

#function 
generate_label_df <- function(TUKEY, variable){

  # Extract labels and factor levels from Tukey post-hoc 
  Tukey.levels <- variable[,4]
  Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])

  #I need to put the labels in the same order as in the boxplot :
  Tukey.labels$treatment=rownames(Tukey.labels)
  Tukey.labels=Tukey.labels[order(Tukey.labels$treatment) , ]
  return(Tukey.labels)
}

model=lm(value_y~class_x, data=df)
ANOVA=aov(model)
TUKEY <- TukeyHSD(ANOVA)
plot(TUKEY , las=1 , col="brown" )

labels <- generate_label_df(TUKEY , TUKEY$`class_x`)#generate labels using function

names(labels) <- c('Letters','class_x')#rename columns for merging

yvalue <- aggregate(.~class_x, data=df, mean)# obtain letter position for y axis using means

final <- merge(labels,yvalue) #merge dataframes

#ggplot with facet_grid
p <- ggplot(data=df, aes(class_x , value_y, fill=class_x))+  
  geom_boxplot(outlier.shape = NA) +
  facet_grid(~var_facet)
p <- p +  scale_y_continuous(limits = quantile(df$value_y, c(0.1, 0.9), na.rm=T))
p <- p + theme_minimal()
p <- p + scale_fill_brewer(palette="Greens")
p <- p + theme(legend.position="none")
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
p <- p + geom_text(data = final, 
                   aes(x = class_x, y = value_y, label = Letters), 
                   vjust=-3.5,hjust=-.5)
p

Solution

  • Maybe this is what you are looking for.

    p <- ggplot(data=df, aes(x=class_x , y=value_y, fill=class_x)) +  
         geom_boxplot(outlier.shape=NA) +
         facet_grid(~var_facet) +
         scale_fill_brewer(palette="Greens") +
         theme_minimal() +
         theme(legend.position="none") +
         theme(axis.text.x=element_text(angle=45, hjust=1)) 
    
    for (facetk in as.character(unique(df$var_facet))) {   
      subdf <- subset(df, var_facet==facetk)
      model=lm(value_y ~ class_x, data=subdf)
      ANOVA=aov(model)
      TUKEY <- TukeyHSD(ANOVA)
    
      labels <- generate_label_df(TUKEY , TUKEY$`class_x`)
      names(labels) <- c('Letters','class_x')
      yvalue <- aggregate(.~class_x, data=subdf, quantile, probs=.75)  
      final <- merge(labels, yvalue)
      final$var_facet <-  facetk
    
      p <- p + geom_text(data = final,  aes(x=class_x, y=value_y, label=Letters), 
                       vjust=-1.5, hjust=-.5)
    }
    p
    

    enter image description here