Search code examples
rggplot2facet

ggplot2 facetting with multiple layers


I'm having trouble getting the correct facetting behavior when overlaying two geoms on a single plot.

My input data consists of two data frames. df contains taxon abundance data that is grouped by variable and stacked by short_taxa, then facetted by SampleType.

df <- structure(list(variable = c("Subject1", "Subject1", "Subject2", 
"Subject2", "Subject5", "Subject5", "Subject7", "Subject7", "Subject8", 
"Subject8", "Subject11", "Subject11", "Subject12", "Subject12", 
"Subject14", "Subject14", "Subject15", "Subject15", "Subject18", 
"Subject18", "Subject20", "Subject20", "Subject22", "Subject22", 
"Subject24", "Subject24", "Subject25", "Subject25", "Subject28", 
"Subject28", "Subject30", "Subject30", "Subject31", "Subject31", 
"Subject32", "Subject32"), value = c(32.4137931034483, 0, 13.6363636363636, 
19.435736677116, 16.3304514889529, 27.4735830931796, 38.4180790960452, 
0.564971751412429, 33.9024390243902, 1.46341463414634, 37.2093023255814, 
5.42635658914729, 23.3175355450237, 32.7962085308057, 35.8024691358025, 
0, 14.1210374639769, 48.4149855907781, 1.86516853932584, 1.50561797752809, 
43.9490445859873, 0.955414012738854, 3.76932989690722, 88.8208762886598, 
29.6511627906977, 4.36046511627907, 32.4742268041237, 19.5876288659794, 
2.03570310053241, 91.1055433761353, 40.9356725146199, 0, 31.2335958005249, 
6.2992125984252, 35.1084812623274, 7.88954635108481), short_taxa = c("f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae"), SampleType = c("Group2", 
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2", 
"Group3", "Group3", "Group2", "Group2", "Group1", "Group1", "Group2", 
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2", 
"Group3", "Group3", "Group3", "Group3", "Group1", "Group1", "Group1", 
"Group1", "Group2", "Group2", "Group3", "Group3", "Group1", "Group1"
)), .Names = c("variable", "value", "short_taxa", "SampleType"
), row.names = c(17L, 21L, 43L, 47L, 121L, 125L, 173L, 177L, 
199L, 203L, 277L, 281L, 303L, 307L, 355L, 359L, 381L, 385L, 459L, 
463L, 511L, 515L, 563L, 567L, 615L, 619L, 641L, 645L, 719L, 723L, 
771L, 775L, 797L, 801L, 823L, 827L), class = "data.frame")

I can get this to plot nicely like so:

ggplot(df, aes(x=variable, y=value, fill=short_taxa, group=short_taxa))
  + geom_bar(stat="identity", position="stack") + ylim(c(-10, 100)) 
  + facet_wrap(~SampleType, scales="free") + theme_classic() + 
  theme(legend.position = "right", legend.key.size = unit(1, "lines"),
  axis.text.x = element_text(angle=90, vjust=0.5), plot.margin = 
  unit(c(2, 1, 0.5, 0.5), "lines"))

enter image description here

Now what I'd like to do is to add a geom_rect below each stacked barplot (corresponding to each Subject) that represents a density measurement for that subject.

df.coloring <- structure(list(variable = c("Subject24", "Subject25", "Subject7", 
"Subject28", "Subject29", "Subject13", "Subject9", "Subject32", 
"Subject33", "Subject11", "Subject20", "Subject14", "Subject21", 
"Subject5", "Subject1", "Subject17", "Subject18", "Subject3"), 
    xmin = c(0.5, 1.5, 4.5, 6.5, 7.5, 10.5, 11.5, 13.5, 14.5, 
    17.5, 19.5, 21.5, 23.5, 24.5, 27.5, 29.5, 30.5, 31.5), xmax = c(1.5, 
    2.5, 5.5, 7.5, 8.5, 11.5, 12.5, 14.5, 15.5, 18.5, 20.5, 22.5, 
    24.5, 25.5, 28.5, 30.5, 31.5, 32.5), ymin = c(-6, -6, -6, 
    -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6
    ), ymax = c(-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 
    -4, -4, -4, -4, -4, -4), SampleType = c("Group2", "Group3", 
    "Group1", "Group2", "Group3", "Group2", "Group1", "Group2", 
    "Group3", "Group1", "Group2", "Group3", "Group3", "Group1", 
    "Group1", "Group2", "Group3", "Group1"), density = c(0.640242130728438, 
    0.116821877425537, 0.0310043091885746, 0.0189890721812844, 
    0.974712340626866, 0.421599371824414, 0.169613848207518, 
    0.76187791978009, 0.69058098597452, 0.600862825522199, 0.671995443990454, 
    0.225653737317771, 0.911656582495198, 0.342635749839246, 
    0.138989825500175, 0.987418259494007, 0.739982327679172, 
    0.241753033129498)), .Names = c("variable", "xmin", "xmax", 
"ymin", "ymax", "SampleType", "density"), row.names = c(1L, 2L, 
5L, 7L, 8L, 11L, 12L, 14L, 15L, 18L, 20L, 22L, 24L, 25L, 28L, 
30L, 31L, 32L), class = "data.frame")

I've tried various iterations of:

ggplot(df, aes(x=variable, y=value, fill=short_taxa,  order=short_taxa)) 
  + geom_bar(stat="identity", position="stack") + ylim(c(-10, 100)) 
  + geom_rect(data=df.coloring, aes(xmin=xmin, xmax=xmax, ymin=ymin, 
    ymax=ymax, color=density), fill=NA, inherit.aes=F)
  + scale_color_distiller() + facet_wrap(~SampleType, scales="free") 
  + theme_classic() + theme(legend.position = "right", 
    legend.key.size = unit(1, "lines"), axis.text.x = 
    element_text(angle=90, vjust=0.5), plot.margin = 
    unit(c(2, 1, 0.5, 0.5), "lines"))

But what ends up happening is that each SampleType plots all 18 of the subjects, instead of just the 6 that belong to the facet. enter image description here

What am I doing wrong? Thanks!


Solution

  • I think this is a situation where geom_tile will work better than geom_rect.

    From the help page:

    geom_rect uses the locations of the four corners (xmin, xmax, ymin and ymax).

    geom_tile uses the center of the tile and its size (x, y, width, height).

    By centering the tile on variable, your faceting should work better because the x axis of geom_bar and geom_tile will be based on the same variable. To use geom_tile, you'll need to define y, width, and height. You can put them into df.coloring instead of ymin, ymax, etc.

    df.coloring$y = -5
    df.coloring$height = 2
    df.coloring$width = 1
    

    On a side note, your variable values in df.coloring do not appear to be matched correctly with SampleType (compared to what's in df). This is making things more complicated. To get things in the correct order for the example code I did:

    df.coloring$variable = unique(df$variable)
    

    Now use geom_tile in place of geom_rect to get the plot you want:

    ggplot(df, aes(x=variable, y=value, fill=short_taxa,  order=short_taxa)) + 
        geom_bar(stat="identity", position="stack") + 
        ylim(c(-10, 100)) + 
        geom_tile(data = df.coloring, aes(x = variable, y = y, width = width, 
                                        height = height, color = density), 
                  fill = NA, inherit.aes = FALSE) + 
        scale_color_distiller() + 
        facet_wrap(~SampleType, scales="free_x") + 
        theme_classic() + 
        theme(legend.position = "right",
              legend.key.size = unit(1, "lines"), 
              axis.text.x = element_text(angle=90, vjust=0.5), 
              plot.margin = unit(c(2, 1, 0.5, 0.5), "lines"))
    

    enter image description here

    Because you want all your tiles to be the same size, you can get away with giving fixed values for y, width, etc. instead of adding these to the dataset.

    geom_tile(data=df.coloring, aes(x = variable, y = -5, width = 1, 
                                            height = 2, color=density), 
                      fill = NA, inherit.aes = FALSE)