Search code examples
rfor-loopggplot2plyrstandard-error

Loop through multiple columns and make a plot (with error bars for each plot) in R


I found great utility in the answer to this post (Loop through multiple columns and make a plot for each in R?), but was wondering how to add standard error bars to my plot. This is my attempt, but I'm not sure how to make it work correctly.

data <- structure(list(year = c(2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 
                        2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2020L, 2020L, 
                        2020L, 2020L, 2020L, 2020L, 2020L), season = structure(c(1L, 
                                                                                 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                                                                                 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"), site = c(1L, 
                                                                                                                                                    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 
                                                                                                                                                    3L, 4L, 5L), temp = c(26.9, 27.8, 27.3, 26.4, 26.8, 29.4, 29.9, 
                                                                                                                                                                          29.9, 29.9, 30, 24, 23.5, 23.9, 23.7, 24.1, 30.2, 30.8, 30.8, 
                                                                                                                                                                          30.3, 30.1), do = c(7.5, 7.2, 7.8, 8.1, 7.4, 3.7, 3.2, 6.4, 4.9, 
                                                                                                                                                                                              5, 6.5, 5.2, 5.9, 6.7, 6.3, 3.63, 1.81, 1.85, 4.25, 0.69), salinity = c(32.29, 
                                                                                                                                                                                                                                                                      30.1, 31.35, 31.93, 30.77, 27.35, 27.34, 28.42, 28.37, 28.24, 
                                                                                                                                                                                                                                                                      32.69, 29.72, 29.15, 28.9, 25.29, 24.37, 23.47, 25.1, 24.79, 
                                                                                                                                                                                                                                                                      23.62), pH = c(8.24, 8.28, 8.37, 8.32, 8.39, 7.85, 7.84, 8.13, 
                                                                                                                                                                                                                                                                                     8.04, 8.06, 8.26, 8.17, 8.18, 8.24, 8.13, 7.8, 7.61, 7.61, 7.95, 
                                                                                                                                                                                                                                                                                     7.41), water_depth = c(95L, 95L, 62L, 63L, 55L, 100L, 107L, 110L, 
                                                                                                                                                                                                                                                                                                            140L, 95L, 85L, 80L, 60L, 53L, 55L, 125L, 135L, 145L, 125L, 100L
                                                                                                                                                                                                                                                                                     ), sed_depth = c(56L, 40L, 22L, 1L, 20L, 60L, 47L, 68L, 40L, 
                                                                                                                                                                                                                                                                                                      20L, 55L, 35L, 20L, 1L, 25L, 45L, 30L, 35L, 1L, 20L), SAV = c(25.5, 
                                                                                                                                                                                                                                                                                                                                                                    41.5, 50, 47.5, 60.1, 53.5, 46.5, 80.5, 20, 32.5, 26.1, 39.5, 
                                                                                                                                                                                                                                                                                                                                                                    29.1, 48.5, 39.6, 63, 50.5, 70, 70, 56)), row.names = c(1L, 155L, 
                                                                                                                                                                                                                                                                                                                                                                                                                            309L, 463L, 617L, 771L, 925L, 1079L, 1233L, 1387L, 1541L, 1695L, 
                                                                                                                                                                                                                                                                                                                                                                                                                            1849L, 2003L, 2157L, 2311L, 2465L, 2619L, 2773L, 2927L), class = "data.frame")

Here is my code. Please note, I'd like to still be able to have the ggtitle and file naming system work:

    col_names <- colnames(data[,-c(1:3)])
    
    
    for (i in col_names) {       # for-loop over columns
    
      cdata2 <- plyr::ddply(i, c("year", "season"), summarise,
                            N    = length(i),
                            n_mean = mean(i),
                            n_median = median(i),
                            sd   = sd(num),
                            se   = sd / sqrt(N))
      
      ggplot(cdata2, aes(x = year, y = n_mean, color = season)) +
        geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), 
                      width=.2, 
                      color = "black") +
        geom_point(color = "black", # Make both seasons have black borders
                   shape = 21, 
                   size = 3,
                   aes(fill = season)) +
        scale_fill_manual(values=c("white", "#C0C0C0")) + 
scale_x_continuous(breaks=c(2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2018,2019,2020)) +
        labs(x= NULL, y = "Mean count") +
        ggtitle(i)
      
      setwd('D:/.../Trend plots')
      
      ggsave(paste0(i, "- ENV_trend_plot.png"), 
             height = 5, width=7, units = "in")
    }

Solution

  • My plyr vocabulary is extremely limited, so I've substituted it with dplyr syntax. After replacing the plyr block, it seemed smooth sailing from there on out. I've included a few lines to show the plots and commented out writing plots to disk; you can of course reverse those changes.

    library(ggplot2)
    library(dplyr)
    
    col_names <- colnames(data[,-c(1:3)])
    
    # A list in which to store plots for reproducibility
    plist <- list()
    
    for (i in col_names) {       # for-loop over columns
      
      cdata2 <- data %>%
        group_by(year, season) %>%
        summarise(N = length(.data[[i]]),
                  n_mean = mean(.data[[i]]),
                  n_median = median(.data[[i]]),
                  sd = sd(.data[[i]]),
                  se = sd / sqrt(N))
    
      ggplot(cdata2, aes(x = year, y = n_mean, color = season)) +
        geom_errorbar(aes(ymin=n_mean-se, ymax=n_mean+se), 
                      width=.2, 
                      color = "black") +
        geom_point(color = "black", # Make both seasons have black borders
                   shape = 21, 
                   size = 3,
                   aes(fill = season)) +
        scale_fill_manual(values=c("white", "#C0C0C0")) + 
        scale_x_continuous(breaks=c(2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2018,2019,2020)) +
        labs(x= NULL, y = "Mean count") +
        ggtitle(i)
      
      # Comment out the line below
      plist[[i]] <- last_plot()
      
      # Uncomment these lines
      # setwd('D:/.../Trend plots')
      # ggsave(paste0(i, "- ENV_trend_plot.png"), 
      #        height = 5, width=7, units = "in")
    }
    
    # Showing plots
    patchwork::wrap_plots(plist)
    

    Created on 2021-09-09 by the reprex package (v2.0.1)