I'm having trouble getting the correct facetting behavior when overlaying two geoms
on a single plot.
My input data consists of two data frames. df
contains taxon abundance data that is grouped by variable
and stacked by short_taxa
, then facetted by SampleType
.
df <- structure(list(variable = c("Subject1", "Subject1", "Subject2",
"Subject2", "Subject5", "Subject5", "Subject7", "Subject7", "Subject8",
"Subject8", "Subject11", "Subject11", "Subject12", "Subject12",
"Subject14", "Subject14", "Subject15", "Subject15", "Subject18",
"Subject18", "Subject20", "Subject20", "Subject22", "Subject22",
"Subject24", "Subject24", "Subject25", "Subject25", "Subject28",
"Subject28", "Subject30", "Subject30", "Subject31", "Subject31",
"Subject32", "Subject32"), value = c(32.4137931034483, 0, 13.6363636363636,
19.435736677116, 16.3304514889529, 27.4735830931796, 38.4180790960452,
0.564971751412429, 33.9024390243902, 1.46341463414634, 37.2093023255814,
5.42635658914729, 23.3175355450237, 32.7962085308057, 35.8024691358025,
0, 14.1210374639769, 48.4149855907781, 1.86516853932584, 1.50561797752809,
43.9490445859873, 0.955414012738854, 3.76932989690722, 88.8208762886598,
29.6511627906977, 4.36046511627907, 32.4742268041237, 19.5876288659794,
2.03570310053241, 91.1055433761353, 40.9356725146199, 0, 31.2335958005249,
6.2992125984252, 35.1084812623274, 7.88954635108481), short_taxa = c("f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae",
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae"), SampleType = c("Group2",
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2",
"Group3", "Group3", "Group2", "Group2", "Group1", "Group1", "Group2",
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2",
"Group3", "Group3", "Group3", "Group3", "Group1", "Group1", "Group1",
"Group1", "Group2", "Group2", "Group3", "Group3", "Group1", "Group1"
)), .Names = c("variable", "value", "short_taxa", "SampleType"
), row.names = c(17L, 21L, 43L, 47L, 121L, 125L, 173L, 177L,
199L, 203L, 277L, 281L, 303L, 307L, 355L, 359L, 381L, 385L, 459L,
463L, 511L, 515L, 563L, 567L, 615L, 619L, 641L, 645L, 719L, 723L,
771L, 775L, 797L, 801L, 823L, 827L), class = "data.frame")
I can get this to plot nicely like so:
ggplot(df, aes(x=variable, y=value, fill=short_taxa, group=short_taxa))
+ geom_bar(stat="identity", position="stack") + ylim(c(-10, 100))
+ facet_wrap(~SampleType, scales="free") + theme_classic() +
theme(legend.position = "right", legend.key.size = unit(1, "lines"),
axis.text.x = element_text(angle=90, vjust=0.5), plot.margin =
unit(c(2, 1, 0.5, 0.5), "lines"))
Now what I'd like to do is to add a geom_rect
below each stacked barplot (corresponding to each Subject
) that represents a density measurement for that subject.
df.coloring <- structure(list(variable = c("Subject24", "Subject25", "Subject7",
"Subject28", "Subject29", "Subject13", "Subject9", "Subject32",
"Subject33", "Subject11", "Subject20", "Subject14", "Subject21",
"Subject5", "Subject1", "Subject17", "Subject18", "Subject3"),
xmin = c(0.5, 1.5, 4.5, 6.5, 7.5, 10.5, 11.5, 13.5, 14.5,
17.5, 19.5, 21.5, 23.5, 24.5, 27.5, 29.5, 30.5, 31.5), xmax = c(1.5,
2.5, 5.5, 7.5, 8.5, 11.5, 12.5, 14.5, 15.5, 18.5, 20.5, 22.5,
24.5, 25.5, 28.5, 30.5, 31.5, 32.5), ymin = c(-6, -6, -6,
-6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6
), ymax = c(-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-4, -4, -4, -4, -4, -4), SampleType = c("Group2", "Group3",
"Group1", "Group2", "Group3", "Group2", "Group1", "Group2",
"Group3", "Group1", "Group2", "Group3", "Group3", "Group1",
"Group1", "Group2", "Group3", "Group1"), density = c(0.640242130728438,
0.116821877425537, 0.0310043091885746, 0.0189890721812844,
0.974712340626866, 0.421599371824414, 0.169613848207518,
0.76187791978009, 0.69058098597452, 0.600862825522199, 0.671995443990454,
0.225653737317771, 0.911656582495198, 0.342635749839246,
0.138989825500175, 0.987418259494007, 0.739982327679172,
0.241753033129498)), .Names = c("variable", "xmin", "xmax",
"ymin", "ymax", "SampleType", "density"), row.names = c(1L, 2L,
5L, 7L, 8L, 11L, 12L, 14L, 15L, 18L, 20L, 22L, 24L, 25L, 28L,
30L, 31L, 32L), class = "data.frame")
I've tried various iterations of:
ggplot(df, aes(x=variable, y=value, fill=short_taxa, order=short_taxa))
+ geom_bar(stat="identity", position="stack") + ylim(c(-10, 100))
+ geom_rect(data=df.coloring, aes(xmin=xmin, xmax=xmax, ymin=ymin,
ymax=ymax, color=density), fill=NA, inherit.aes=F)
+ scale_color_distiller() + facet_wrap(~SampleType, scales="free")
+ theme_classic() + theme(legend.position = "right",
legend.key.size = unit(1, "lines"), axis.text.x =
element_text(angle=90, vjust=0.5), plot.margin =
unit(c(2, 1, 0.5, 0.5), "lines"))
But what ends up happening is that each SampleType
plots all 18 of the subjects, instead of just the 6 that belong to the facet.
What am I doing wrong? Thanks!
I think this is a situation where geom_tile
will work better than geom_rect
.
From the help page:
geom_rect uses the locations of the four corners (xmin, xmax, ymin and ymax).
geom_tile uses the center of the tile and its size (x, y, width, height).
By centering the tile on variable
, your faceting should work better because the x axis of geom_bar
and geom_tile
will be based on the same variable. To use geom_tile
, you'll need to define y
, width
, and height
. You can put them into df.coloring
instead of ymin
, ymax
, etc.
df.coloring$y = -5
df.coloring$height = 2
df.coloring$width = 1
On a side note, your variable
values in df.coloring
do not appear to be matched correctly with SampleType
(compared to what's in df
). This is making things more complicated. To get things in the correct order for the example code I did:
df.coloring$variable = unique(df$variable)
Now use geom_tile
in place of geom_rect
to get the plot you want:
ggplot(df, aes(x=variable, y=value, fill=short_taxa, order=short_taxa)) +
geom_bar(stat="identity", position="stack") +
ylim(c(-10, 100)) +
geom_tile(data = df.coloring, aes(x = variable, y = y, width = width,
height = height, color = density),
fill = NA, inherit.aes = FALSE) +
scale_color_distiller() +
facet_wrap(~SampleType, scales="free_x") +
theme_classic() +
theme(legend.position = "right",
legend.key.size = unit(1, "lines"),
axis.text.x = element_text(angle=90, vjust=0.5),
plot.margin = unit(c(2, 1, 0.5, 0.5), "lines"))
Because you want all your tiles to be the same size, you can get away with giving fixed values for y
, width
, etc. instead of adding these to the dataset.
geom_tile(data=df.coloring, aes(x = variable, y = -5, width = 1,
height = 2, color=density),
fill = NA, inherit.aes = FALSE)