Search code examples
rggplot2facetfacet-wrap

Combine facets from two datasets to overlay in a single ggplot


I have two datasets that I need to visualise on the same figure. Each dataset is split in ggplot by nested facet wrap with levels species > region > project number > year. The first gives a heatmap of percentage of positive samples while the second is the number of samples collected each month.

I'm looking for advice on how to stack the line graph of the second dataset above the heatmap for the first, for each year (similar to adding a density curve above a heatmap, but with count data in a separate dataset). I have a very large dataset, so finding a way to automate this is crucial - I really hope it is possible.

What I am aiming for:

Ideal result combining both datasets

I am not sure how to merge the two datasets, if that is what is necessary to proceed, but here is a subset of my data and minimal code for each of the ggplots. I included the minimum data needed to show the extent of the faceting, so I hope this is appropriate!

library(ggplot2)
library(viridis)
library(ggExtra)

devtools::install_github("teunbrand/ggh4x")
library(ggh4x)

col.range=c(0, 1) # common colourbar among figures

# dataset 1 = percent detections per month
dat1=structure(list(projectID = c("5", "5", "5", "5", "5", "5", "5", 
                                  "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                  "5"), ecoregion = structure(c(4L, 4L, 4L, 3L, 3L, 3L, 3L, 2L, 
                                                                2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Grand Banks-12", 
                                                                                                                                "Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
                                                                ), class = "factor"), year = c(2018, 2018, 2018, 2018, 2018, 
                                                                                               2018, 2018, 2017, 2017, 2017, 2017, 2017, 2017, 2018, 2018, 2018, 
                                                                                               2018, 2018, 2018, 2018, 2018), month = c(8, 9, 10, 8, 8, 9, 9, 
                                                                                                                                        6, 6, 7, 7, 8, 9, 5, 5, 6, 6, 7, 8, 8, 9), spp.labs = c("italic('A. aspersa')", 
                                                                                                                                                                                                "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                                                                                "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                                                                                "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                "italic('B. schlosseri')", "italic('B. schlosseri')"), percent_pos = c(NA, 
                                                                                                                                                                                                                                                                       NA, NA, NA, 0.5, NA, 0.5, NA, 0.6, NA, 0.6, 1, 1, NA, 0.5, NA, 
                                                                                                                                                                                                                                                                       0.833333333333333, 1, NA, 0.761904761904762, 1)), class = c("grouped_df", 
                                                                                                                                                                                                                                                                                                                                   "tbl_df", "tbl", "data.frame"), row.names = c(NA, -21L), groups = structure(list(
                                                                                                                                                                                                                                                                                                                                     projectID = c("5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                                                                                                                                                                                                                                                                                                                                   "5", "5", "5", "5", "5"), ecoregion = structure(c(2L, 2L, 
                                                                                                                                                                                                                                                                                                                                                                                                     2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L), levels = c("Grand Banks-12", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                 "Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
                                                                                                                                                                                                                                                                                                                                                                                                     ), class = "factor"), year = c(2017, 2017, 2017, 2017, 2018, 
                                                                                                                                                                                                                                                                                                                                                                                                                                    2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018), month = c(6, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     7, 8, 9, 5, 6, 7, 8, 9, 8, 9, 8, 9, 10), spp.labs = c("italic('B. schlosseri')", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('A. aspersa')", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           "italic('A. aspersa')"), .rows = structure(list(8:9, 10:11, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           12L, 13L, 14:15, 16:17, 18L, 19:20, 21L, 4:5, 6:7, 1L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           2L, 3L), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  ), row.names = c(NA, -14L), .drop = TRUE))

ggplot() + 
  geom_tile(dat1,
            mapping=aes( 
              as.factor(month),as.factor(year), 
              fill=percent_pos),size=0.1, colour="white") +
  scale_fill_gradientn(name="Percent (%) \npositive samples",
                       colours=rev(viridis(10)), limits=col.range,
                       labels=c("1%","25%","50%","75%","100%"),
                       breaks=c(0.01,0.25,0.5,0.75,1.0),
                       na.value="grey85") + 
  guides(fill = guide_colourbar(ticks = FALSE, label.vjust = 0.5,
                                label.position = "right",
                                title.position="top",
                                title.vjust = 2.5))+
  facet_nested_wrap(spp.labs ~ ecoregion + projectID + year, 
                    ncol=1, strip.position = "left",
                    labeller= labeller(spp.labs=label_parsed),
                    scales="free_y") +  
  scale_y_discrete(expand=c(0,0)) +  
  scale_x_discrete(limits=as.factor(c(1:12)),
                   breaks = c(1,2,3,4,5,6,
                              7,8,9,10,11,12),
                   labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
  theme_minimal(base_size = 10) +
  labs(x="Month", y="", title="") + 
  theme(panel.border = element_rect(colour="black", fill=NA),
        axis.title.y = element_text(margin = margin(r=10)),
        axis.title.x = element_text(margin=margin(t=5)),
        axis.text.y=element_blank(),
        axis.text.x=element_text(angle=60, hjust=1),
        legend.position = "right",
        strip.background=element_rect(fill="grey95",colour="white"),
        strip.text.y.left = element_text(angle=0), 
        strip.placement="outside",
        plot.margin = margin(t=30),
        panel.spacing = unit(2.6,"lines")) +
  removeGrid()

# dataset 2 = total number of samples each month
dat2=structure(list(month = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
                                        8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
                                        11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
                                        1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), levels = c("1", 
                                                                                                       "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"), 
                    projectID = c("5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                  "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                  "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                  "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", "5", 
                                  "5", "5", "5"), ecoregion = structure(c(4L, 4L, 4L, 4L, 4L, 
                                                                          4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                                                          3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
                                                                          2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Grand Banks-12", 
                                                                                                                                          "Magdalen Shallows-16", "Scotian Shelf-17", "Bay of Fundy-18"
                                                                          ), class = "factor"), year = c(2018, 2018, 2018, 2018, 2018, 
                                                                                                         2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 
                                                                                                         2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2017, 
                                                                                                         2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 
                                                                                                         2017, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 
                                                                                                         2018, 2018, 2018), spp.labs = c("italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('A. aspersa')", "italic('A. aspersa')", 
                                                                                                                                         "italic('A. aspersa')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')", "italic('B. schlosseri')", "italic('B. schlosseri')", 
                                                                                                                                         "italic('B. schlosseri')"), cnt = c(0, 0, 0, 0, 0, 0, 0, 
                                                                                                                                                                             4, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 
                                                                                                                                                                             0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 22, 6, 6, 21, 6, 
                                                                                                                                                                             0, 0, 0)), row.names = c(NA, -48L), class = c("tbl_df", "tbl", 
                                                                                                                                                                                                                           "data.frame"))

ggplot() +
  geom_path(dat2, 
            mapping=aes(y=cnt, x=as.factor(month), group=1)) +
  scale_y_continuous() +
  scale_x_discrete(limits=as.factor(c(1:12)),
                   breaks = c(1,2,3,4,5,6,
                              7,8,9,10,11,12),
                   labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                              "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
  facet_nested_wrap(.~ spp.labs + ecoregion + projectID + year,
                    ncol=1, strip.position = "left") +  
  theme_minimal(base_size = 10) +
  labs(x=NULL, y=NULL, title=NULL) + 
  theme(panel.border = element_rect(colour="black", fill=NA),
        axis.text.x=element_blank(),
        axis.text.y=element_text(size=7),
        strip.background=element_blank(),
        strip.text.y.left = element_blank(), 
        panel.spacing = unit(2.6,"lines")) +
  removeGrid()

I have also tried to split the facets into individual plots and then combine the two with cowplot, but have only been successful in splitting by species, which still gives the problem of needing to facet and combine by year. For example:

p.list = lapply(sort(unique(dat1$spp.labs)), function(i) {
  ggplot(dat1[dat1$spp.labs==i,],
         mapping=aes( 
           as.factor(month),as.factor(year), 
           fill=percent_pos))+
    geom_tile(size=0.1, colour="white") +
    scale_fill_gradientn(name="Percent (%) \npositive samples",
                         colours=rev(viridis(10)), limits=col.range,
                         labels=c("1%","25%","50%","75%","100%"),
                         breaks=c(0.01,0.25,0.5,0.75,1.0),
                         na.value="grey85") + 
    guides(fill = guide_colourbar(ticks = FALSE, label.vjust = 0.5,
                                  label.position = "right",
                                  title.position="top",
                                  title.vjust = 2.5))+
    facet_nested_wrap(spp.labs ~ ecoregion + projectID + year, 
                      ncol=1, strip.position = "left",
                      labeller= labeller(spp.labs=label_parsed),
                      scales="free_y") +  
    scale_y_discrete(expand=c(0,0)) +  
    scale_x_discrete(limits=as.factor(c(1:12)),
                     breaks = c(1,2,3,4,5,6,
                                7,8,9,10,11,12),
                     labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                                "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
    theme_minimal(base_size = 10) +
    labs(x="Month", y="", title="") + 
    theme(panel.border = element_rect(colour="black", fill=NA),
          axis.title.y = element_text(margin = margin(r=10)),
          axis.title.x = element_text(margin=margin(t=5)),
          axis.text.y=element_blank(),
          axis.text.x=element_text(angle=60, hjust=1),
          legend.position = "right",
          strip.background=element_rect(fill="grey95",colour="white"),
          strip.text.y.left = element_text(angle=0), 
          strip.placement="outside",
          plot.margin = margin(t=30),
          panel.spacing = unit(2.6,"lines")) +
    removeGrid() 
})

Solution

  • Thank you cowplot! I have made the second ggplot into a grob using as_grob, specifying the dimensions and coordinates in draw_grob.

    library(cowplot)
    
    ggdraw(p) + draw_grob(as_grob(p1), x = 0.55, y = 0.02, width = 0.288, height = 1)