Search code examples
rggplot2

ggplot2: common legend for stacked bars + line combined by functions


I am generating combination of stacked bar and line using two functions. I want both line and stacked colors to be present on the legend. I've taken a look at the examples on stackoverflow, but can't make them work.

My 'data' dataframe is:

   timeInterval        Group      count total_count
   <dttm>              <chr>      <int>       <int>
 1 2024-06-14 13:17:00 Request1    227        1555
 2 2024-06-14 13:17:00 Request2    504        1555
 3 2024-06-14 13:17:00 Request3    824        1555
 4 2024-06-14 13:18:00 Request1    961        4356
 5 2024-06-14 13:18:00 Request2    1761       4356
 6 2024-06-14 13:18:00 Request3    1002       4356
 7 2024-06-14 13:18:00 Request4    270        4356
 8 2024-06-14 13:18:00 Request5    362        4356
 9 2024-06-14 13:19:00 Request1    1489       7081
10 2024-06-14 13:19:00 Request2    26         7081
...

'workload' dataframe is

            timeInterval count
                  <POSc> <int>
  1: 2024-08-05 00:00:00     0
  2: 2024-08-05 00:01:00  2727
  3: 2024-08-05 00:02:00  5453
...

my functions are:

generate_stacked_ggplot <- function(data, column_name = "Group") {

  p <- ggplot(data, aes(x = timeInterval, 
                    y = count, 
                    fill = .data[[column_name]]) +
    geom_bar(stat = "identity") +
    scale_fill_viridis_d(option = "D",
                         alpha = 0.9) +
    scale_y_continuous(labels = scales::number_format(accuracy = 1, big.mark = ",")) +
    labs(x = "Timeline, NZST", 
         y = paste0("Number of Requests per ", time_unit),
         title = "Title"),
         fill = paste(column_name)) +
    theme_minimal() +
  
  return(p)

}

add_workload_profile <- function(original_plot, workload_profile_data) {
  
  #get x-axis limits from original_plot
  plot_build <- ggplot_build(original_plot)
  x_limits <- plot_build$layout$panel_scales_x[[1]]$range$range
  x_min <- x_limits[1]
  x_max <- x_limits[2]
  
  #adjusting workload timestamps to fit onto main plot
  workload_profile_data$timeInterval <- x_min + 
    as.numeric(difftime(workload_profile_data$timeInterval, 
                        as.POSIXct("00:00:00", format = "%H:%M:%S", tz = "Pacific/Auckland"), 
                        units = "secs")
               ) %>%
    as.POSIXct(format = "%Y/%m/%d %H:%M:%OS")
  
  
  ggplot_w_workload <- original_plot + 
    geom_line(data = workload_profile_data, 
              aes(x = timeInterval, y = count), 
              color = "#FFA500", 
              linewidth = 1,
              linetype = "dashed",
              inherit.aes = FALSE)

  return(ggplot_w_workload)
}

main_plot <- generate_stacked_ggplot(data)
main_plot_line_added <- add_workload_profile(main_plot, workload)

I want both line and stacked colors to be present on the legend.

However when I follow the advice from creating a common legend for bar plot and line, I am getting the followin error and no line in the legend.

Warning messages:
1: No shared levels found between `names(values)` of the manual scale and the data's colour values.

data in csv form:

timeInterval,Group,count,total_count
2024-06-14 13:17:00,Request1,227,1555
2024-06-14 13:17:00,Request2,504,1555
2024-06-14 13:17:00,Request3,824,1555
2024-06-14 13:18:00,Request1,961,4356
2024-06-14 13:18:00,Request2,1761,4356
2024-06-14 13:18:00,Request3,1002,4356
2024-06-14 13:18:00,Request4,270,4356
2024-06-14 13:18:00,Request5,362,4356
2024-06-14 13:19:00,Request1,1489,7081
2024-06-14 13:19:00,Request2,26,7081

workload.csv

timeInterval,count
00:00:00,0
00:01:00,2727
00:02:00,5453
00:03:00,8180

Solution

  • As a general rule, if you want a legend then you to map on aesthetics. Hence, as in the referenced post you have to map on the color aes inside geom_line and set your desired color using scale_color_manual instead of setting the color as a parameter outside of aes():

    library(ggplot2)
    
    add_workload_profile <- function(original_plot, workload_profile_data) {
      # get x-axis limits from original_plot
      plot_build <- ggplot_build(original_plot)
      x_limits <- plot_build$layout$panel_scales_x[[1]]$range$range
      x_min <- x_limits[1]
      x_max <- x_limits[2]
    
      # adjusting workload timestamps to fit onto main plot
    
      workload_profile_data$timeInterval <- x_min +
        as.numeric(difftime(workload_profile_data$timeInterval,
          as.POSIXct("00:00:00", format = "%H:%M:%S", tz = "Pacific/Auckland"),
          units = "secs"
        )) |>
        as.POSIXct(format = "%Y/%m/%d %H:%M:%OS")
    
    
      original_plot +
        geom_line(
          data = workload_profile_data,
          aes(x = timeInterval, y = count, color = "Workload"),
          linewidth = 1,
          linetype = "dashed",
          inherit.aes = FALSE
        ) +
        scale_color_manual(
          values = "#FFA500"
        )
    }
    
    time_unit <- "Minute"
    
    data$timeInterval <- as.POSIXct(
      data$timeInterval,
      tz = "Pacific/Auckland"
    )
    
    workload$timeInterval <- as.POSIXct(workload$timeInterval,
      format = "%H:%M:%S",
      tz = "Pacific/Auckland"
    )
    
    main_plot <- generate_stacked_ggplot(data)
    main_plot_line_added <- add_workload_profile(main_plot, workload)
    
    main_plot_line_added
    

    DATA

    data <- structure(list(timeInterval = c(
      "2024-06-14 13:17:00", "2024-06-14 13:17:00",
      "2024-06-14 13:17:00", "2024-06-14 13:18:00", "2024-06-14 13:18:00",
      "2024-06-14 13:18:00", "2024-06-14 13:18:00", "2024-06-14 13:18:00",
      "2024-06-14 13:19:00", "2024-06-14 13:19:00"
    ), Group = c(
      "Request1",
      "Request2", "Request3", "Request1", "Request2", "Request3", "Request4",
      "Request5", "Request1", "Request2"
    ), count = c(
      227L, 504L, 824L,
      961L, 1761L, 1002L, 270L, 362L, 1489L, 26L
    ), total_count = c(
      1555L,
      1555L, 1555L, 4356L, 4356L, 4356L, 4356L, 4356L, 7081L, 7081L
    )), class = "data.frame", row.names = c(NA, -10L))
    
    data <- read.delim(text = text, sep = ",")
    #> Error in textConnection(text, encoding = "UTF-8"): invalid 'text' argument
    
    workload <- structure(list(timeInterval = c(
      "00:00:00", "00:01:00", "00:02:00",
      "00:03:00"
    ), count = c(0L, 2727L, 5453L, 8180L)), class = "data.frame", row.names = c(
      NA,
      -4L
    ))