Search code examples
rggplot2tukey

How to combine geom_text and boxplot color with discrete x-axis?


I have individuals in an experiment and I wish to represent graphically those who belong in the same group defined using tukey test. Here is an exemple of the full dataset :

structure(list(Nom = structure(c(18L, 7L, 27L, 39L, 6L, 27L,39L, 18L, 39L, 27L, 10L, 13L, 25L, 10L, 13L, 10L, 13L, 10L, 21L, 13L, 21L, 39L, 7L, 25L, 18L, 39L, 21L, 18L, 39L, 25L, 6L, 25L, 7L, 25L, 6L, 21L, 25L, 27L,6L, 10L, 25L, 7L, 27L, 7L, 13L, 25L, 27L, 25L, 21L, 13L, 27L, 18L, 7L, 6L, 39L, 27L, 6L, 18L, 39L, 6L, 25L, 18L, 7L, 39L, 27L, 25L, 18L, 25L, 39L, 25L, 13L, 10L, 7L, 25L, 7L, 21L, 18L, 21L, 13L, 18L, 10L, 7L, 25L, 6L, 39L, 7L, 39L, 18L, 6L, 21L, 27L, 6L, 25L, 6L, 39L, 25L, 27L, 18L,13L, 39L, 25L, 27L, 27L, 10L, 18L, 39L, 7L, 7L, 6L, 39L, 7L, 25L, 39L,25L, 27L, 25L, 21L, 10L, 39L, 18L, 27L, 13L, 21L, 39L, 25L, 18L, 25L, 21L, 21L, 39L, 25L, 18L, 7L, 10L, 18L, 7L, 21L, 39L, 6L, 21L, 27L, 10L, 25L,18L,10L, 25L, 13L, 27L, 25L, 39L,39L, 39L, 39L, 39L, 39L, 39L, 39L, 25L, 21L, 25L, 7L, 18L, 18L, 39L, 25L, 7L, 25L, 6L, 21L, 10L, 27L, 13L, 25L, 25L, 18L, 21L, 39L, 27L, 6L, 39L, 6L, 6L, 39L, 6L, 39L, 6L, 39L, 39L, 6L, 39L,6L, 39L, 6L, 39L, 6L, 39L, 39L, 6L, 39L, 6L, 39L, 6L, 21L, 39L, 6L, 7L, 25L, 13L, 25L, 6L, 10L, 18L, 39L, 25L, 13L, 7L, 27L, 25L, 18L, 7L, 39L, 25L, 27L, 6L, 25L, 21L, 39L, 21L, 13L, 10L, 18L, 7L, 6L, 21L, 27L, 39L, 13L, 6L, 7L, 21L, 18L, 6L, 18L, 25L, 10L, 39L, 25L, 7L, 25L, 13L, 21L, 27L, 10L, 18L, 7L, 21L, 10L, 10L, 39L, 25L, 18L, 7L, 6L, 39L, 25L, 27L, 25L, 13L, 25L, 25L, 25L, 7L, 18L, 27L, 39L, 6L, 39L, 25L, 7L, 27L, 25L, 13L, 18L, 25L, 39L, 10L, 25L, 25L, 18L, 39L, 25L, 6L, 7L, 39L, 25L, 27L, 10L, 18L, 13L, 18L, 18L, 25L, 7L, 18L, 39L, 6L, 39L, 13L, 18L,10L, 18L, 18L, 25L, 7L, 27L, 13L, 18L, 27L, 39L, 13L, 10L, 25L,39L, 25L, 6L, 7L, 27L, 13L, 10L, 18L, 13L), .Label = c("ARC", "CARE", "SUMO", "BELLA", "BOURREE", "BRISE", "LAND", "GAN", "FREE", "MELISSE", "DECIDE", "QUISS", "LINE", "DOLENKA", "DOLLY", "DOPA", "DOUCE", "DOURI", "DUNE", "QUISS2", "DOREE", "RENCONTRE", "RONCE", "MALICIEUSE", "SIMBA", "FORETS", "TENTH", "TROPIC", "KNOW", "UMUST","UPLAT", "SWEETY", "ORIGAN", "DEDANS", "VEGA", "CORRAZON", "VERTUS", "VIRE", "VISCASHE"), class = "factor"),
Qte_conso = c(573L, 1438L, 196L, 79L, 1501L, 34L, 85L, 10L, 
497L, 807L, 369L, 64L, 11L, 30L, 22L, 159L, 150L, 943L, 230L, 
1265L, 721L, 3L, 64L, 1L, 1L, 3L, 1L, 1501L, 1500L, 37L, 
1057L, 6L, 933L, 16L, 279L, 1501L, 4L, 119L, 165L, 1275L, 
467L, 118L, 1111L, 449L, 1418L, 305L, 273L, 23L, 1L, 1L, 
1L, 1L, 413L, 727L, 1275L, 1071L, 24L, 108L, 56L, 749L, 5L, 
374L, 454L, 168L, 430L, 7L, 666L, 24L, 1L, 35L, 46L, 530L, 
468L, 11L, 165L, 182L, 352L, 1319L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1500L, 514L, 667L, 789L, 1502L, 11L, 254L, 7L, 458L, 
181L, 277L, 800L, 2L, 1501L, 805L, 1048L, 246L, 5L, 5L, 1L, 
3L, 1L, 230L, 1504L, 548L, 1270L, 70L, 272L, 8L, 935L, 201L, 
595L, 822L, 630L, 350L, 455L, 1501L, 29L, 50L, 20L, 1061L, 
65L, 655L, 221L, 3L, 1L, 1L, 1L, 3L, 928L, 1500L, 88L, 285L, 
1499L, 1412L, 354L, 220L, 17L, 573L, 1280L, 16L, 1501L, 1102L, 
352L, 1L, 9L, 1L, 11L, 5L, 1L, 4L, 1L, 232L, 1L, 1L, 1L, 
1L, 1261L, 897L, 107L, 1501L, 558L, 1503L, 1500L, 1501L, 
108L, 1500L, 21L, 65L, 1L, 1L, 1L, 1L, 300L, 5L, 11L, 12L, 
1L, 1L, 33L, 3L, 7L, 5L, 5L, 7L, 1L, 3L, 29L, 18L, 11L, 42L, 
3L, 61L, 3L, 17L, 41L, 744L, 1501L, 880L, 174L, 1284L, 194L, 
122L, 35L, 130L, 1503L, 1503L, 453L, 660L, 1133L, 217L, 1501L, 
612L, 1L, 1500L, 1485L, 160L, 1503L, 1500L, 464L, 8L, 17L, 
683L, 1501L, 672L, 1L, 1L, 1L, 1L, 933L, 751L, 634L, 633L, 
924L, 1486L, 12L, 867L, 1488L, 581L, 1242L, 548L, 68L, 576L, 
852L, 866L, 14L, 566L, 261L, 1L, 1L, 1L, 1L, 1501L, 896L, 
551L, 1500L, 1500L, 1501L, 605L, 72L, 1500L, 74L, 1125L, 
73L, 176L, 1L, 1L, 1L, 1L, 783L, 1501L, 670L, 205L, 1501L, 
1501L, 230L, 1500L, 1500L, 1L, 47L, 1501L, 496L, 3L, 1L, 
1L, 555L, 1501L, 1501L, 1501L, 945L, 1501L, 1501L, 520L, 
1501L, 71L, 3L, 1L, 959L, 542L, 56L, 1501L, 1444L, 1094L, 
20L, 1500L, 29L, 910L, 1501L, 542L, 1500L, 406L, 1L, 1L, 
1L, 7L, 1L, 460L, 1500L, 1040L, 1500L, 1501L, 1500L, 42L, 
1500L, 897L, 302L)), row.names = c(NA, -331L), class = "data.frame")`

So far, I have been able to perform tukey test (after anova to assess significance) using the agricolae package, extract the output in a dataframe. My issue is that, if the labels containing tukeys's letters groups are linked to my individuals in the right order, colors are not.

draw_plot <- data.frame(tukey_case1["groups"])
draw_plot <- cbind(rownames(draw_plot), data.frame(draw_plot, row.names=NULL))
colnames(draw_plot) <- c("Nom", "Qte_conso", "Letters")

draw_plot$color[draw_plot$Letters == "a"] <- "skyblue"
draw_plot$color[draw_plot$Letters == "ab"] <- "pink"
draw_plot$color[draw_plot$Letters == "abc"] <- "orange"
draw_plot$color[draw_plot$Letters == "bc"] <- "purple"
draw_plot$color[draw_plot$Letters == "c"] <- "grey"

draw_plot ends up with this kind of structure :

structure(list(Nom = structure(c(4L, 3L, 2L, 8L, 1L, 6L, 5L, 9L, 7L), .Label = c("BRISE", "LAND", "MELISSE", "LINE", "DOURI", "DOREE", "SIMBA", "TENTH", "VISCASHE"), class = "factor"), 
Qte_conso = c(768.12, 763.375, 703.59375, 668.866666666667, 608.486486486486, 568.875, 435.85, 328.266666666667, 237.779661016949), 
Letters = structure(c(1L, 1L, 1L, 1L, 2L, 3L, 3L, 4L, 5L), .Label=("a", "ab", "abc", "bc", "c"), class = "factor"), 
color = c("skyblue", "skyblue", "skyblue", "skyblue", "pink", "orange", "orange", "purple", "skyblue4")), row.names = c(NA,-9L), class = "data.frame")

Then I used ggplot2 to create a plot with : point for each measurement, boxplot and labels+colors corresponding to tukey's groups.

ggplot(case1, aes(x = Nom, y = Qte_conso)) +
  geom_point(aes(x=Nom, y=Qte_conso)) +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(), 
        axis.text.x = element_text(angle = 90, vjust=0.5)) +
  scale_y_continuous(limits = c(-10, 1600)) +
  labs(x = "Individus", y = "Quantité consommée (g)") +
  ggtitle("Title")))) +
  theme(plot.title = element_text(hjust = 0.5, face='bold')) +
  geom_boxplot(fill=draw_plot$color, stat = "boxplot") +
  scale_discrete_manual(aes(draw_plot$Nom), values = draw_plot$color) +
  geom_text(data = draw_plot, aes(x = Nom, y = Qte_conso, label = Letters), angle=90, vjust=.3)

The graph obtained is quite what I would like to achieve. Indeed I have a boxplot with individual names on the x-axis and their corresponding label in the plot. However, colors are not corresponding to the letters but appear in the order given in the draw_plot table previously created.

I can't figure how to assign properly colors to letters (i.e. Tukey groups). I have looked at those subjects Tukey's results on boxplot in R Tukeys post-hoc on ggplot boxplot to try to find a solution on my own. I see I am not using any function to "recreate" labels order as they do but I was not able to adapt their solution to my code.

Hope I gave enough explanations and thank you in advance to those who will take some time to read this.


Solution

  • I have finally comes up with something. I assume there is a better solution than the one I used and I would be glad to read about it if someone post it.

    I have used the dplyr package to join my initial dataset with the one containing the letters and then added those letters as a group in the aesthetic part of my plot. I used the code bellow :

    case1_plot <- left_join(case1, draw_plot, by="Nom")
    
    ggplot(case1_plot, aes(x = Nom, y = Qte_conso.x, fill=Letters)) +
      geom_point(aes(x=Nom, y=Qte_conso.x)) +
      theme(panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            axis.text.x = element_text(angle = 90, vjust=0.5)) +
      scale_y_continuous(limits = c(-10, 1600)) +
      labs(x = "Individus", y = "Quantité consommée (g)") +
      ggtitle("Title")))) +
      theme(plot.title = element_text(hjust = 0.5, face='bold')) +
      geom_boxplot(stat = "boxplot") +
      geom_text(data = draw_plot, aes(x = Nom, y = Qte_conso, label = Letters), angle=90, vjust=.3)