Search code examples
rggplot2splitmapplygeom-hline

Error in split for mapply plotting of multiple plots


I am trying to split two dataframes to allow for production of multiple plots with mapply but cant get the outcome i need. I think it's got to do with the length of each split list being different but I cant work out how to fix it.

The first df is Tox_Dat which needs to be split by Tox_Dat$Pesticide (2 levels)

 > str(Tox_Dat)
'data.frame':   86 obs. of  10 variables:
 $ Pesticide : Factor w/ 2 levels "Ametryn","Diuron": 1 1 1 1 2 2 2 2 2 2 ...
 $ Herb      : chr  "H" "H" "H" "H" ...
 $ PSII      : chr  "Y" "Y" "Y" "Y" ...
 $ Taxa      : chr  "Coral" "Microalgae" "Microalgae" "Seagrass" ...
 $ Phyla     : chr  "Cnidaria" "Chlorophyta" "Pyrophyta" "Traecheophyta" ...
 $ Species   : chr  "Seriatopora hystrix " "Dunaliella sp." "Cladocopium goreaui " "Halophila ovalis" ...
 $ Life.Stage: chr  "Adult" "" "" "Adult" ...
 $ Pathway   : chr  "Photsynthesis" "Photsynthesis" "Photsynthesis" "Photsynthesis" ...
 $ Measure   : chr  "a-EC50" "a-EC50" "a-EC50" "a-EC50" ...
 $ Conc_ug   : num  0.07 0.34 0.22 0.35 0.372 ...

and the second df is DGV_Dat which needs to be split by both DGV_Dat$Pesticide (2 levels) and DGV_Dat$DGV.Type (2 levels). In this way, each pesticide (e.g. Diuron) will have two DGV.Type (Fresh and Marine). Note the actual dfs i am working with have 24 levels of Pesticide, each with a Marine and Fresh DGV. I have subset for this example.

 > str(DGV_Dat)
 'data.frame':  16 obs. of  4 variables:
 $ Pesticide       : Factor w/ 2 levels "Ametryn","Diuron": 2 2 2 2 2 2 2 2 1 1 ...
 $ DGV.Type        : Factor w/ 2 levels "Fresh","Marine": 1 1 1 1 2 2 2 2 1 1 ...
 $ Protection.Level: chr  "PC99" "PC95" "PC90" "PC80" ...
 $ Conc_ug         : num  0.08 0.23 0.42 0.9 0.43 0.67 0.86 1.2 0.074 0.33 ...

The data should then be combined into plots where the Tox_Dat data is presented as box plots with jitter, and the DGV_Dat provides the geom_hlines across the top. The outcome in this example would be four plots: Diuron Fresh, Diuron Marine, Ametryn Fresh, Ametryn Marine.

enter image description here

However this is what my plots look like (all DGV_Type lines appear on every plot, i.e. not split properly): enter image description here

Here is the code that I have tried so far:

#Data
#Split 1
L1 <- split(ToxDat_subset,ToxDat_subset$Pesticide)
L2 <- split(DGVDat_subset,DGVDat_subset$Pesticide, DGV_Dat,DGV_Dat$DGV.Type)

#Function to create plots
myplot <- function(x,y)
{
  z <- ggplot(ToxDat_subset, aes(x=factor(Phyla), y=Conc_ug)) +
    geom_boxplot(color="dark gray", fill = 'dark gray', alpha = 0.1) +
    geom_jitter(aes(shape = Pathway), height=.3, width=.3, size = 1.5) +
    scale_y_continuous(trans='log10') +
    theme_bw ()+ 
    scale_x_discrete(guide = guide_axis(angle = 45)) +
    theme(panel.border = element_blank(), #these 3 lines alter the plot to look like the ones that XLStat produces, i.e. with no grid lines, etc
          #panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), axis.line = element_line(color = "black"),
          text = element_text(size = 14), 
          axis.title.x = element_text(margin = unit(c(5, 0, 0, 0), "mm")),
          axis.title.y = element_text(margin = unit(c(0, 5, 0, 0), "mm"))) +
    geom_hline(data = DGVDat_subset,aes(yintercept = Conc_ug))+
               #,linetype=DGVDat_subset$Protection.Level)+
               # labs(linetype=DGV_Dat$Protection.Level)) +
    # labs(fill = "Dose (mg)")
    labs(x = "Phyla", y = "Log10 Concentration (µg/L)") +
    labs(title = (unique(ToxDat_subset$Pesticide)),
         subtitle = "Marine ecotox data",
         caption = "Source: Citation, Reliability ?? ")
  # caption = (unique(DGV_Dat$DGV.Type)))
  z 
}

#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,SIMPLIFY = FALSE)

#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')

This is the dput for the Tox_Dat df:

structure(list(Pesticide = c("Ametryn", "Ametryn", "Ametryn", 
"Ametryn", "Diuron", "Diuron", "Diuron", "Diuron", "Diuron", 
"Diuron", "Diuron", "Diuron", "Diuron", "Diuron", "Diuron", "Diuron", 
"Diuron", "Diuron", "Diuron", "Diuron"), Herb = c("H", "H", "H", 
"H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H", 
"H", "H", "H", "H"), PSII = c("Y", "Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"
), Taxa = c("Coral", "Microalgae", "Microalgae", "Seagrass", 
"Jellyfish", "Jellyfish", "Jellyfish", "Magroalgae", "Aenemone", 
"Aenemone", "Coral", "Coral", "Coral", "Coral", "Coral", "Coral", 
"Coral", "Coral", "Coral", "Coral"), Phyla = c("Cnidaria", "Chlorophyta", 
"Pyrophyta", "Traecheophyta", "Cnidaria", "Cnidaria", "Cnidaria", 
"Chlorophyta", "Cnidaria", "Cnidaria", "Cnidaria", "Cnidaria", 
"Cnidaria", "Cnidaria", "Cnidaria", "Cnidaria", "Cnidaria", "Cnidaria", 
"Cnidaria", "Cnidaria"), Species = c("Seriatopora hystrix ", 
"Dunaliella sp.", "Cladocopium goreaui ", "Halophila ovalis", 
"Cassiopea maremetens", "Cassiopea maremetens", "Cassiopea maremetens", 
"Halimeda sp. ", "Exaiptasia pallida ", "Exaiptasia pallida ", 
"Acropora formosa", "Acropora formosa", "Acropora valida", "Acropora valida", 
"Acropora valida", "Acropora valida", "Montiora digitata", "Pocillopora damicornis", 
"Pocillopora damicornis", "Pocillopora damicornis"), Life.Stage = c("Adult", 
"", "", "Adult", "Polyps", "Polyps", "Polyps", "Adult", "Adult", 
"Adult", "Adult", "Adult ", "Adult", "Adult", "Adult", "Adult", 
"Adult ", "Adult", "Recruit", "Recruit"), Pathway = c("Photsynthesis", 
"Photsynthesis", "Photsynthesis", "Photsynthesis", "Bleaching", 
"Growth", "Photsynthesis", "Photsynthesis", "Photsynthesis", 
"Reproductive", "Photsynthesis", "Photsynthesis", "Bleaching", 
"Mortality", "Photsynthesis", "Reproductive", "Photsynthesis", 
"Bleaching", "Bleaching", "Mortality"), Measure = c("a-EC50", 
"a-EC50", "a-EC50", "a-EC50", "c-EC50", "c-EC50", "c-EC50", "c-LOEC", 
"a-EC50", "a-EC50", "a-EC50", "a-EC50", "c-LOEC", "c-LOEC", "c-LOEC", 
"c-LOEC", "a-EC50", "c-LOEC", "c-LOEC", "c-LOEC"), Conc_ug = c(0.07, 
0.34, 0.22, 0.35, 0.372, 0.0698, 0.284, 2.476, 0.8, 10, 0.27, 
0.51, 3.52, 3.52, 0.364, 3.52, 0.59, 3.52, 4, 40)), row.names = c(7L, 
8L, 9L, 10L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 
61L, 62L, 63L, 64L, 65L, 66L), class = "data.frame")

And here is the dput for the DGV_Dat df:

structure(list(Pesticide = c("Diuron", "Diuron", "Diuron", "Diuron", 
"Diuron", "Diuron", "Diuron", "Diuron", "Ametryn", "Ametryn", 
"Ametryn", "Ametryn", "Ametryn", "Ametryn", "Ametryn", "Ametryn"
), DGV.Type = c("Fresh", "Fresh", "Fresh", "Fresh", "Marine", 
"Marine", "Marine", "Marine", "Fresh", "Fresh", "Fresh", "Fresh", 
"Marine", "Marine", "Marine", "Marine"), Protection.Level = c("PC99", 
"PC95", "PC90", "PC80", "PC99", "PC95", "PC90", "PC80", "PC99", 
"PC95", "PC90", "PC80", "PC99", "PC95", "PC90", "PC80"), Conc_ug = c(0.08, 
0.23, 0.42, 0.9, 0.43, 0.67, 0.86, 1.2, 0.074, 0.33, 0.66, 1.4, 
0.1, 0.61, 1.3, 2.8)), row.names = c(NA, 16L), class = "data.frame")

Thank you in advance for any advice.


Solution

  • Besides some issues with your code you are right. mapply expects list or vectors of the same length. Taking a different approach, instead of splitting your datasets you could use subsetting in your function to achieve your desired result. To this end I first create a dataframe of the combinations of Pesticide and DGV.Type to plot. Then use mapply or Map to loop over the categories and in your function filter your datasets accordingly. And as the arguments of your function are now character strings, doing so also makes it easy to create the (sub-)titles.

    library(ggplot2)
    
    myplot <- function(x, y) {
      ToxDat <- subset(ToxDat_subset, Pesticide == x)
      DGVDat <- subset(DGVDat_subset, Pesticide == x & DGV.Type == y)
    
      ggplot(ToxDat, aes(x = factor(Phyla), y = Conc_ug)) +
        geom_boxplot(color = "dark gray", fill = "dark gray", alpha = 0.1) +
        geom_jitter(aes(shape = Pathway), height = .3, width = .3, size = 1.5) +
        scale_y_continuous(trans = "log10") +
        theme_bw() +
        scale_x_discrete(guide = guide_axis(angle = 45)) +
        theme(
          panel.border = element_blank(),
          panel.grid.minor = element_blank(), axis.line = element_line(color = "black"),
          text = element_text(size = 14),
          axis.title.x = element_text(margin = unit(c(5, 0, 0, 0), "mm")),
          axis.title.y = element_text(margin = unit(c(0, 5, 0, 0), "mm"))
        ) +
        geom_hline(data = DGVDat, aes(yintercept = Conc_ug)) +
        labs(x = "Phyla", y = "Log10 Concentration (µg/L)") +
        labs(
          title = x,
          subtitle = paste0(y, " ecotox data"),
          caption = "Source: Citation, Reliability ?? "
        )
    }
    
    combs <- expand.grid(
      Pesticide = unique(DGVDat_subset$Pesticide),
      DGV.Type = unique(DGVDat_subset$DGV.Type)
    )
    
    combs
    #>   Pesticide DGV.Type
    #> 1    Diuron    Fresh
    #> 2   Ametryn    Fresh
    #> 3    Diuron   Marine
    #> 4   Ametryn   Marine
    
    Map(
      myplot,
      combs$Pesticide, combs$DGV.Type
    )
    #> [[1]]
    

    #> 
    #> [[2]]
    

    #> 
    #> [[3]]
    

    #> 
    #> [[4]]