Search code examples
rggplot2plotbackgroundbackground-color

How to change the background color of panel based on discrete y-axis labels in ggplot2 in R?


I have a dataframe named plot_df in R with the following structure:

# Import the library
library(ggplot2)

# Create a sample data
set.seed(123)
plot_df <- data.frame(gene = rep(paste0("ENSG", 1:10, ".17"), each = 2),
                      mean = rnorm(20, 0.5, 0.1),
                      sd = rnorm(20, 0.02, 0.01),
                      group = rep(c("group_a", "group_b"), 10))

I'm trying to create a ggplot with points and error bars, where the y-axis represents the gene names (as discrete labels), x-axis represents the mean values, and the color represents the group (either "group_a" or "group_b"). Here's the code I'm using:

ggplot(plot_df, aes(y=gene)) + 
    geom_point(aes(x=mean, color = group), position = position_dodge(width=0.9)) + 
    geom_errorbar(aes(xmin=mean-sd, xmax=mean+sd, color = group), width=.2,
                 position=position_dodge(.9)) + 
    geom_vline(aes(xintercept=0.5), linetype=2, color = 'gray') + 
    theme_bw() + 
    theme(panel.background = element_blank(),
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank())

enter image description here

However, I want to change the background color of the panel based on the gene names on the y-axis (each gene have different color or neighboring genes do not have the same color). As my y-axis labels are discrete, I'm not sure how to achieve this. Any help would be appreciated. Thanks!

I have tried:

Change background colour between day and night in ggplot2 in R


Solution

  • Here is a solution using geom_tile(),

    1. We create a data frame for the background colors
    2. While we use geom_tile() we will set the x aesthetics in order to cover the full width of the plot and adjust the width of each tile accordingly.
    3. geom_tile() uses x = sum(x_limits) / 2 to position the tiles at the center of the plot and width = diff(x_limits) to ensure each tile spans the entire width of the plot
    
    library(ggplot2)
    library(dplyr)
    
    set.seed(123)
    plot_df <- data.frame(
      gene = rep(paste0("ENSG", 1:10, ".17"), each = 2),
      mean = rnorm(20, 0.5, 0.1),
      sd = rnorm(20, 0.02, 0.01),
      group = rep(c("group_a", "group_b"), 10)
    )
    
    # df for background colors
    bg_df <- plot_df %>%
      distinct(gene) %>%
      arrange(gene) %>% 
      mutate(color = ifelse(row_number() %% 2 == 1, "gold", "lightgrey"))
    
    # x axis limts based on data 
    x_limits <- range(plot_df$mean - plot_df$sd, plot_df$mean + plot_df$sd, na.rm = TRUE)
    
    # the plot
    ggplot(plot_df, aes(y = gene, x = mean)) +
      geom_tile(data = bg_df, aes(y = gene, x = sum(x_limits) / 2, width = diff(x_limits), fill = color), 
                alpha = 0.3, inherit.aes = FALSE) +
      geom_point(aes(color = group), position = position_dodge(width = 0.9)) +
      geom_errorbar(aes(xmin = mean - sd, xmax = mean + sd, color = group), 
                    width = 0.2, position = position_dodge(.9)) +
      scale_color_manual(values = c("steelblue3", "red3"))+
      geom_vline(xintercept = 0.5, linetype = 2, color = 'gray80') +
      theme_bw() +
      theme(panel.background = element_blank(),
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(),
            legend.position = "right") +
      scale_fill_identity()
    

    enter image description here