Search code examples
rggplot2geomscale-color-manual

R boxplot ggplot2 4 group but 6 parameter


I am trying to get some customized boxplot using ggplot2 on this data.

> dput(Family.boxplot)
structure(list(X.Datasets = c(7845L, 7846L, 7847L, 7848L, 7849L, 
7866L, 7867L, 7868L, 7869L, 7857L, 7859L, 7875L, 7877L, 7878L, 
7879L, 7855L, 7856L, 7858L, 7850L, 7851L, 7852L, 7853L, 7854L, 
7870L, 7871L, 7872L, 7873L, 7874L, 7860L, 7861L, 7880L, 7862L, 
7863L, 7864L, 7881L, 7882L, 7883L, 7884L), Akkermansiaceae = c(255L, 
407L, 736L, 270L, 333L, 137L, 200L, 188L, 474L, 560L, 90L, 788L, 
66L, 58L, 157L, 148L, 359L, 162L, 174L, 546L, 270L, 623L, 186L, 
457L, 416L, 347L, 1483L, 353L, 597L, 229L, 714L, 409L, 701L, 
269L, 860L, 1091L, 2873L, 1536L), Bacteroidaceae = c(992L, 908L, 
651L, 171L, 442L, 188L, 596L, 340L, 474L, 268L, 137L, 866L, 687L, 
782L, 861L, 332L, 372L, 275L, 945L, 906L, 1068L, 1460L, 546L, 
1279L, 2626L, 765L, 1457L, 679L, 1532L, 729L, 1286L, 1460L, 1416L, 
1093L, 1818L, 1564L, 663L, 342L), Christensenellaceae = c(0L, 
0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 
0L, 0L, 0L, 0L, 0L), Clostridiaceae = c(33L, 50L, 97L, 91L, 254L, 
353L, 159L, 315L, 149L, 139L, 200L, 99L, 101L, 160L, 317L, 240L, 
382L, 46L, 122L, 141L, 314L, 87L, 244L, 179L, 115L, 270L, 80L, 
168L, 88L, 143L, 120L, 154L, 28L, 93L, 64L, 89L, 30L, 83L), Coriobacteriaceae = c(85L, 
264L, 114L, 287L, 77L, 0L, 0L, 97L, 138L, 177L, 91L, 291L, 146L, 
122L, 138L, 41L, 0L, 234L, 34L, 123L, 99L, 116L, 63L, 81L, 0L, 
97L, 120L, 73L, 162L, 126L, 268L, 146L, 165L, 144L, 221L, 370L, 
552L, 482L), Deferribacteraceae = c(68L, 45L, 70L, 163L, 773L, 
934L, 43L, 443L, 188L, 88L, 176L, 46L, 65L, 119L, 0L, 195L, 260L, 
20L, 67L, 36L, 312L, 0L, 153L, 179L, 343L, 129L, 28L, 58L, 210L, 
192L, 82L, 0L, 0L, 130L, 0L, 84L, 0L, 14L), Eggerthellaceae = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 24L, 233L, 0L, 115L, 44L, 0L, 38L, 
95L, 71L, 279L, 53L, 224L, 53L, 167L, 79L, 0L, 143L, 0L, 146L, 
0L, 98L, 42L, 138L, 121L, 192L, 84L, 255L, 326L, 588L, 469L), 
    Enterobacteriaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 141L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), 
    Erysipelotrichaceae = c(0L, 12L, 28L, 37L, 39L, 8L, 10L, 
    7L, 24L, 22L, 12L, 25L, 0L, 0L, 18L, 0L, 25L, 0L, 14L, 21L, 
    0L, 21L, 0L, 0L, 9L, 0L, 49L, 8L, 0L, 0L, 7L, 0L, 11L, 0L, 
    16L, 17L, 28L, 10L), Eubacteriaceae = c(91L, 71L, 157L, 35L, 
    124L, 86L, 148L, 37L, 32L, 487L, 228L, 176L, 223L, 31L, 94L, 
    149L, 82L, 54L, 78L, 109L, 96L, 47L, 80L, 197L, 256L, 153L, 
    219L, 25L, 23L, 64L, 69L, 149L, 559L, 27L, 53L, 106L, 32L, 
    118L), Eubacteriales.Family.XIII..Incertae.Sedis = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 16L, 25L, 0L, 0L, 
    0L, 10L, 0L, 5L, 13L, 6L, 13L, 0L, 0L, 31L, 0L, 11L, 0L, 
    10L, 0L, 0L, 11L, 20L, 0L, 20L, 23L, 0L, 0L), Lachnospiraceae = c(744L, 
    1032L, 2506L, 1161L, 4272L, 5544L, 4230L, 5646L, 2896L, 2312L, 
    6130L, 1890L, 4315L, 2651L, 3829L, 5143L, 4639L, 1784L, 2701L, 
    2878L, 3208L, 1822L, 4891L, 3340L, 1423L, 5104L, 1220L, 3319L, 
    2546L, 4928L, 3637L, 2315L, 815L, 2746L, 1581L, 1750L, 928L, 
    2125L), Lactobacillaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 57L, 0L, 34L, 0L, 0L, 222L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L), Muribaculaceae = c(89L, 121L, 42L, 0L, 80L, 34L, 
    63L, 119L, 23L, 758L, 768L, 150L, 348L, 204L, 64L, 355L, 
    620L, 915L, 0L, 18L, 0L, 22L, 0L, 554L, 473L, 0L, 29L, 0L, 
    0L, 0L, 0L, 170L, 238L, 0L, 0L, 25L, 286L, 86L), Oscillospiraceae = c(282L, 
    176L, 507L, 133L, 1103L, 1180L, 662L, 809L, 590L, 541L, 736L, 
    462L, 771L, 795L, 1054L, 906L, 1332L, 197L, 714L, 497L, 1143L, 
    449L, 918L, 720L, 437L, 972L, 342L, 726L, 790L, 1002L, 444L, 
    391L, 234L, 919L, 193L, 762L, 121L, 491L), Peptococcaceae = c(0L, 
    0L, 0L, 0L, 9L, 0L, 0L, 12L, 7L, 0L, 10L, 7L, 0L, 0L, 0L, 
    11L, 14L, 0L, 0L, 0L, 11L, 7L, 11L, 4L, 0L, 12L, 0L, 0L, 
    0L, 15L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Peptostreptococcaceae = c(104L, 
    57L, 421L, 431L, 71L, 42L, 187L, 70L, 400L, 673L, 201L, 383L, 
    211L, 500L, 325L, 126L, 37L, 420L, 0L, 9L, 7L, 62L, 0L, 0L, 
    23L, 0L, 73L, 366L, 0L, 29L, 0L, 52L, 117L, 7L, 0L, 0L, 0L, 
    0L), Spiroplasmataceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 34L, 
    0L, 41L, 0L, 0L, 82L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L), Staphylococcaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 9L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), Sutterellaceae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 82L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 14L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L), Tannerellaceae = c(1298L, 673L, 870L, 0L, 275L, 457L, 
    734L, 647L, 654L, 171L, 116L, 226L, 226L, 1206L, 398L, 482L, 
    423L, 139L, 975L, 1010L, 1499L, 1347L, 1179L, 409L, 662L, 
    726L, 1018L, 165L, 1634L, 970L, 1057L, 1123L, 943L, 2329L, 
    1362L, 1081L, 390L, 378L), Responders = c("NR", "NR", "NR", 
    "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", 
    "NR", "NR", "CR", "CR", "CR", "NR", "NR", "NR", "NR", "NR", 
    "NR", "NR", "NR", "NR", "NR", "NR", "NR", "NR", "CR", "CR", 
    "CR", "CR", "CR", "CR", "CR"), treatment = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 
    NA, NA, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
    NA, NA, NA, NA, NA, NA, NA), levels = c("A", "B", 
    "C", "D"), class = c("ordered", "factor")), Order = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L), treatment.withoutresponse.indicator = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L), levels = c("A", "B", 
    "C", "D"), class = c("ordered", "factor"))), row.names = c(NA, 
-38L), class = "data.frame")

But Without having a normal 4 group boxplot, I like to have colours according to the 'Responders' column. But I am not sure if there is a way to do it directly.

The way I am doing is not a straightforward way

#This will give outlined boxplots + jitter

p = ggplot(data = Family.boxplot1, aes(x= treatment.withoutresponse.indicator, 
y = Clostridiaceae, 
colour = treatment.withoutresponse.indicator))+ 
     geom_boxplot(outlier.shape = NA)+
     scale_colour_manual(values = c("blue3","dark green","Dark orange", "brown3" ))+
     xlab("") + 
     ylab("Clostridiaceae")+ 
     geom_jitter(width = 0,size=1.5 ) #jitter width 0
#This will give outlined boxplots + jitter + responder points

new_data <- data.frame(name=c(rep('B',3),rep('D',7)),
            value=Family.boxplot$Clostridiaceae[Family.boxplot$Responders=="CR"])
p + geom_jitter(data=new_data, 
                aes(x=name, y=value, fill=name), 
                position=position_jitter(0), 
                color="Misty Rose", pch=20) 
#This is if we want to point CR ---Responders

This is the plot resulting my recent code enter image description here

But ideally I want responders for green box (2nd) and red box (4th) in different colours.

I mean 4 box but 6 colours. I know I can also additionally split and point them. But seems there should be an easy way.


Solution

  • We can use ggh4x::scale_listed:

    library(ggplot2)
    library(ggh4x)
    
    ggplot(data = Family.boxplot, 
           aes(x = treatment.withoutresponse.indicator, 
               y = Clostridiaceae)) + 
      geom_boxplot(aes(boxC = treatment.withoutresponse.indicator),
                   outlier.size = 0) +
      geom_point(aes(pointC = Responders), 
                 size =  1.5) +
      scale_listed(
        list(scale_fill_manual(values = c("blue3", "dark green",
                                          "Dark orange", "brown3"), 
                               aesthetics = "boxC"),
             scale_fill_manual(values = c("purple", "deeppink"),
                               aesthetics = "pointC")),
             replaces = c("color", "color")) +
      xlab("") + 
      ylab("Clostridiaceae")
    #> Warning in geom_boxplot(aes(boxC = treatment.withoutresponse.indicator), :
    #> Ignoring unknown aesthetics: boxC
    #> Warning in geom_point(aes(pointC = Responders), size = 1.5): Ignoring unknown
    #> aesthetics: pointC
    

    Created on 2024-04-12 with reprex v2.0.2