I have two datasets that I need to visualise on the same figure. Each dataset is split in ggplot by nested facet wrap with levels species > region > project number > year. The first gives a heatmap of percentage of positive samples while the second is the number of samples collected each month.
I'm looking for advice on how to stack the line graph of the second dataset above the heatmap for the first, for each year (similar to adding a density curve above a heatmap, but with count data in a separate dataset). I have a very large dataset, so finding a way to automate this is crucial - I really hope it is possible.
What I am aiming for:
I am not sure how to merge the two datasets, if that is what is necessary to proceed, but here is a subset of my data and minimal code for each of the ggplots. I included the minimum data needed to show the extent of the faceting, so I hope this is appropriate!
library(ggplot2)
library(viridis)
library(ggExtra)
devtools::install_github("teunbrand/ggh4x")
library(ggh4x)
col.range=c(0, 1) # common colourbar among figures
# dataset 1 = percent detections per month
dat1=structure(list(projectID = c("5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5"), ecoregion = structure(c(4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Grand Banks-12",
"Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
), class = "factor"), year = c(2018, 2018, 2018, 2018, 2018,
2018, 2018, 2017, 2017, 2017, 2017, 2017, 2017, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018), month = c(8, 9, 10, 8, 8, 9, 9,
6, 6, 7, 7, 8, 9, 5, 5, 6, 6, 7, 8, 8, 9), spp.labs = c("italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')"), percent_pos = c(NA,
NA, NA, NA, 0.5, NA, 0.5, NA, 0.6, NA, 0.6, 1, 1, NA, 0.5, NA,
0.833333333333333, 1, NA, 0.761904761904762, 1)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -21L), groups = structure(list(
projectID = c("5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5"), ecoregion = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L), levels = c("Grand Banks-12",
"Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
), class = "factor"), year = c(2017, 2017, 2017, 2017, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018), month = c(6,
7, 8, 9, 5, 6, 7, 8, 9, 8, 9, 8, 9, 10), spp.labs = c("italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')"), .rows = structure(list(8:9, 10:11,
12L, 13L, 14:15, 16:17, 18L, 19:20, 21L, 4:5, 6:7, 1L,
2L, 3L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -14L), .drop = TRUE))
ggplot() +
geom_tile(dat1,
mapping=aes(
as.factor(month),as.factor(year),
fill=percent_pos),size=0.1, colour="white") +
scale_fill_gradientn(name="Percent (%) \npositive samples",
colours=rev(viridis(10)), limits=col.range,
labels=c("1%","25%","50%","75%","100%"),
breaks=c(0.01,0.25,0.5,0.75,1.0),
na.value="grey85") +
guides(fill = guide_colourbar(ticks = FALSE, label.vjust = 0.5,
label.position = "right",
title.position="top",
title.vjust = 2.5))+
facet_nested_wrap(spp.labs ~ ecoregion + projectID + year,
ncol=1, strip.position = "left",
labeller= labeller(spp.labs=label_parsed),
scales="free_y") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(limits=as.factor(c(1:12)),
breaks = c(1,2,3,4,5,6,
7,8,9,10,11,12),
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
theme_minimal(base_size = 10) +
labs(x="Month", y="", title="") +
theme(panel.border = element_rect(colour="black", fill=NA),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin=margin(t=5)),
axis.text.y=element_blank(),
axis.text.x=element_text(angle=60, hjust=1),
legend.position = "right",
strip.background=element_rect(fill="grey95",colour="white"),
strip.text.y.left = element_text(angle=0),
strip.placement="outside",
plot.margin = margin(t=30),
panel.spacing = unit(2.6,"lines")) +
removeGrid()
# dataset 2 = total number of samples each month
dat2=structure(list(month = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), levels = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"),
projectID = c("5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5",
"5", "5", "5"), ecoregion = structure(c(4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Grand Banks-12",
"Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
), class = "factor"), year = c(2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2017,
2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017,
2017, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,
2018, 2018, 2018), spp.labs = c("italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')",
"italic('A. aspersa')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')",
"italic('B. schlosseri')"), cnt = c(0, 0, 0, 0, 0, 0, 0,
4, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0,
0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 22, 6, 6, 21, 6,
0, 0, 0)), row.names = c(NA, -48L), class = c("tbl_df", "tbl",
"data.frame"))
ggplot() +
geom_path(dat2,
mapping=aes(y=cnt, x=as.factor(month), group=1)) +
scale_y_continuous() +
scale_x_discrete(limits=as.factor(c(1:12)),
breaks = c(1,2,3,4,5,6,
7,8,9,10,11,12),
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
facet_nested_wrap(.~ spp.labs + ecoregion + projectID + year,
ncol=1, strip.position = "left") +
theme_minimal(base_size = 10) +
labs(x=NULL, y=NULL, title=NULL) +
theme(panel.border = element_rect(colour="black", fill=NA),
axis.text.x=element_blank(),
axis.text.y=element_text(size=7),
strip.background=element_blank(),
strip.text.y.left = element_blank(),
panel.spacing = unit(2.6,"lines")) +
removeGrid()
I have also tried to split the facets into individual plots and then combine the two with cowplot, but have only been successful in splitting by species, which still gives the problem of needing to facet and combine by year. For example:
p.list = lapply(sort(unique(dat1$spp.labs)), function(i) {
ggplot(dat1[dat1$spp.labs==i,],
mapping=aes(
as.factor(month),as.factor(year),
fill=percent_pos))+
geom_tile(size=0.1, colour="white") +
scale_fill_gradientn(name="Percent (%) \npositive samples",
colours=rev(viridis(10)), limits=col.range,
labels=c("1%","25%","50%","75%","100%"),
breaks=c(0.01,0.25,0.5,0.75,1.0),
na.value="grey85") +
guides(fill = guide_colourbar(ticks = FALSE, label.vjust = 0.5,
label.position = "right",
title.position="top",
title.vjust = 2.5))+
facet_nested_wrap(spp.labs ~ ecoregion + projectID + year,
ncol=1, strip.position = "left",
labeller= labeller(spp.labs=label_parsed),
scales="free_y") +
scale_y_discrete(expand=c(0,0)) +
scale_x_discrete(limits=as.factor(c(1:12)),
breaks = c(1,2,3,4,5,6,
7,8,9,10,11,12),
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
theme_minimal(base_size = 10) +
labs(x="Month", y="", title="") +
theme(panel.border = element_rect(colour="black", fill=NA),
axis.title.y = element_text(margin = margin(r=10)),
axis.title.x = element_text(margin=margin(t=5)),
axis.text.y=element_blank(),
axis.text.x=element_text(angle=60, hjust=1),
legend.position = "right",
strip.background=element_rect(fill="grey95",colour="white"),
strip.text.y.left = element_text(angle=0),
strip.placement="outside",
plot.margin = margin(t=30),
panel.spacing = unit(2.6,"lines")) +
removeGrid()
})
Thank you cowplot! I have made the second ggplot into a grob using as_grob
, specifying the dimensions and coordinates in draw_grob
.
library(cowplot)
ggdraw(p) + draw_grob(as_grob(p1), x = 0.55, y = 0.02, width = 0.288, height = 1)