Search code examples
rfunctionggplot2lookuptidyeval

How to include a lookup table in a function for R ggplot graph and/or pass certain strings into graph?


I am trying to write a function in R for re-creating a ggplot graph. The graph is using climate data, and I want to be easily able to remake it for 3 different climate scenarios (rcp column) and multiple different percentiles/smooths.

Code to generate an example dataframe:

df_hist <- data.frame(rcp = c("Hist", "Hist", "Hist", "Hist", "Hist", "Hist"), 
    date = c(1979, 1980, 1981, 1982, 1983, 1984), mean = c(97.1124289537908, 
        135.364260728983, 24.4033167950203, 153.59677124136, 
        177.594607139079, 39.6085327444814), Q05 = c(4.1381868023812e-65, 
        2.94270229560265e-68, 3.86129081174159e-81, 1.06605479821109e-51, 
        1.79404728324904e-40, 8.28390180526523e-36), Q25 = c(1.01269396115261e-41, 
        8.78115797693937e-45, 3.82879556669681e-37, 2.60884128233389e-28, 
        4.39037901508925e-17, 7.68605084861368e-15), median = c(1.85605943345325e-12, 
        1.95575826419004e-15, 1.14253007463387e-13, 7.23576991419774e-05, 
        0.140429987721102, 7.63006636355939e-06), Q75 = c(21.4021262157246, 
        31.3895154168531, 0.0333018038213947, 96.3254717677912, 
        274.5007935262, 1.60034420671794), Q95 = c(615.545600520142, 
        660.3338816459, 199.63296816906, 847.03945259953, 797.645790902726, 
        250.623552018151))

Here is my working function. It takes the dataframe above. However, it is not customized to my liking yet. Right now, the default for ylow and yhigh are Q05 and Q95, which matches the text I put in to scale the color manually ("5th Percentile" and "95th Percentile"). I want there to be some way that this function can detect what percentiles are set for ylow and yhigh and then change the text accordingly. For example, if ylow was set to Q25, it would then change the "5th Percentile" to "25th Percentile". I was thinking potentially some sort of lookup table, but cannot figure out how to implement this.

graph_timeseries_quantile <- function(ts_list,
                                      ylow = Q05,
                                      yhigh = Q95,
                                      hist_rcp_name = "Hist",
                                      xaxis = date, 
                                      ysmooth = mean) {
  df_hist %>% ggplot(aes(x={{xaxis}}, y = {{ysmooth}})) +
    geom_line(aes(y={{ylow}}, color="5th Percentile", lty="5th Percentile"), lwd=1) +
    geom_line(aes(y={{yhigh}}, color="95th Percentile", lty="95th Percentile"), lwd=1) +
    geom_ribbon(aes(x={{xaxis}}, ymin = {{ylow}}, ymax = {{yhigh}}), fill = "#E0EEEE", alpha = 0.5) +
    geom_smooth(method = "loess", se=F, col="gray")+
    geom_line(aes(color = "Annual Mean Historical", lty = "Annual Mean Historical"), lwd=1) +
    # RCP lines and ribbon
    theme_bw() +
    scale_color_manual(name = "Legend", 
                       values = c("5th Percentile" = "dodgerblue4", 
                                  "95th Percentile" = "aquamarine",
                                  "Annual Mean Historical" = "black")) +
    scale_linetype_manual(name = "Legend",
                          values = c("5th Percentile" = 3,
                                     "95th Percentile" = 3,
                                     "Annual Mean Historical" = 1)) + 
    labs(x="Year", y="Annual Flow (cfs)", title=paste0("Annual Historical Streamflow"))
}

graph_timeseries_quantile(df_hist)

Above graph result

Another idea I had was to just input the text manually into the function, but when I tried this it just left that line off. See below example where I add an argument to the function, "ychar_low", and set this as "5th Percentile" and it removes the bottom line:

graph_timeseries_quantile <- function(ts_list,
                                      ylow = Q05,
                                      ychar_low = "5th Percentile",
                                      yhigh = Q95,
                                      hist_rcp_name = "Hist",
                                      xaxis = date, 
                                      ysmooth = mean) {
  df_hist %>% ggplot(aes(x={{xaxis}}, y = {{ysmooth}})) +
    geom_line(aes(y={{ylow}}, color=ychar_low, lty=ychar_low), lwd=1) +
    geom_line(aes(y={{yhigh}}, color="95th Percentile", lty="95th Percentile"), lwd=1) +
    geom_ribbon(aes(x={{xaxis}}, ymin = {{ylow}}, ymax = {{yhigh}}), fill = "#E0EEEE", alpha = 0.5) +
    geom_smooth(method = "loess", se=F, col="gray")+
    geom_line(aes(color = "Annual Mean Historical", lty = "Annual Mean Historical"), lwd=1) +
    # RCP lines and ribbon
    theme_bw() +
    scale_color_manual(name = "Legend", 
                       values = c(ychar_low = "dodgerblue4", 
                                  "95th Percentile" = "aquamarine",
                                  "Annual Mean Historical" = "black")) +
    scale_linetype_manual(name = "Legend",
                          values = c(ychar_low = 3,
                                     "95th Percentile" = 3,
                                     "Annual Mean Historical" = 1)) + 
    labs(x="Year", y="Annual Flow (cfs)", title=paste0("Annual Historical Streamflow"))
}
graph_timeseries_quantile(df_hist)

Graph after adding argument ychar_low

Any suggestions at all would be appreciated. This is my first time trying to use a function with ggplot graphs.


Solution

  • One approach would be to convert the unquoted column names to character strings using rlang::as_label, then use e.g. switch or ... to assign the labels. In the code below I use named vectors for the labels as well as for the vectors of colors and linetypes.

    Note. I also use limits=rev to reverse the order of the items in the legend.

    library(ggplot2)
    
    graph_timeseries_quantile <- function(ts_list,
                                          ylow = Q05,
                                          yhigh = Q95,
                                          hist_rcp_name = "Hist",
                                          xaxis = date,
                                          ysmooth = mean) {
      get_label <- function(x) {
        x <- rlang::as_label(x)
        switch(x,
          "Q05" = "5th Percentile",
          "Q25" = "25th Percentile",
          "Q75" = "75th Percentile",
          "Q95" = "95th Percentile",
          "unknown Percentile"
        )
        # Alternative:
        # paste0(
        #   readr::parse_number(x),
        #   "th Percentile"
        # )
      }
    
      labels <- c(
        ylow = get_label(enquo(ylow)),
        yhigh = get_label(enquo(yhigh)),
        mean = "Annual Mean Historical"
      )
    
      pal_color <- c(
        ylow = "dodgerblue4",
        yhigh = "aquamarine",
        mean = "black"
      )
    
      pal_lty <- c(
        ylow = 3,
        yhigh = 3,
        mean = 1
      )
    
      df_hist |>
        ggplot(aes(x = {{ xaxis }}, y = {{ ysmooth }})) +
        geom_line(aes(y = {{ ylow }}, color = "ylow", lty = "ylow"), lwd = 1) +
        geom_line(aes(y = {{ yhigh }}, color = "yhigh", lty = "yhigh"), lwd = 1) +
        geom_ribbon(aes(x = {{ xaxis }}, ymin = {{ ylow }}, ymax = {{ yhigh }}),
          fill = "#E0EEEE", alpha = 0.5
        ) +
        geom_smooth(method = "loess", se = F, col = "gray") +
        geom_line(aes(color = "mean", lty = "mean"), lwd = 1) +
        # RCP lines and ribbon
        theme_bw() +
        scale_color_manual(
          name = "Legend",
          limits = rev,
          labels = labels,
          values = pal_color
        ) +
        scale_linetype_manual(
          name = "Legend",
          limits = rev,
          labels = labels,
          values = pal_lty
        ) +
        labs(
          x = "Year", y = "Annual Flow (cfs)",
          title = paste0("Annual Historical Streamflow")
        )
    }
    
    graph_timeseries_quantile(df_hist)
    #> `geom_smooth()` using formula = 'y ~ x'
    

    
    graph_timeseries_quantile(df_hist, Q25, Q75)
    #> `geom_smooth()` using formula = 'y ~ x'