Search code examples
rggplot2plotboxplot

Adding significance markers for group comparisons in a boxplot using ggplot2


I have a data like following:

set.seed(123)
expr_data <- data.frame(
    cell_line_name = rep(c("CL1", "CL2"), each = 50),
    GSE_id = rep(c("GSE1", "GSE2"), each = 50),
    model = rep(c("M1", "M2"), each = 50),
    expr = rnorm(100),
    age_group = sample(c('A', 'B'), 100, replace = TRUE)
)
    cell_line_name GSE_id model        expr age_group
1              CL1   GSE1    M1 -0.34391723         B
2              CL1   GSE1    M1  0.09049665         A
3              CL1   GSE1    M1  1.59850877         A
4              CL1   GSE1    M1 -0.08856511         A
5              CL1   GSE1    M1  1.08079950         B
6              CL1   GSE1    M1  0.63075412         B
7              CL1   GSE1    M1 -0.11363990         A
8              CL1   GSE1    M1 -1.53290200         B
9              CL1   GSE1    M1 -0.52111732         A
10             CL1   GSE1    M1 -0.48987045         B
11             CL1   GSE1    M1  0.04715443         A

I what to polt a boxplot with cell_line_name, GSE_id, and model as x-axis, expr as y-axis, and show the comparesion between group A and B

ggplot(expr_data.gene, aes(x = paste0(cell_line_name, '_', GSE_id, '_', model), y = expr, color = age_group)) + 
    geom_boxplot() + 
    geom_point(position = position_jitterdodge(), alpha = 0.5)

enter image description here

I also what to add signif label for each comparesion in x value, I have tried ggsignif and ggpubr, but not work.

ggplot(expr_data.gene, aes(x = paste0(cell_line_name, '_', GSE_id, '_', model), y = expr, color = age_group)) + 
    geom_boxplot() + 
    geom_point(position = position_jitterdodge(), alpha = 0.5) + 
    ggpubr::stat_compare_means(comparisons = list(c("A", "B")))

expected output: enter image description here


Solution

  • The comparison needs to be either the names or index of 2 values on the x-axis, so you could either:

    • Use facet_wrap rather than dodging and hide the panels, or
    • Include age_group in the x-axis:
    library(tidyverse)
    library(ggsignif)
    
    set.seed(123)
    
    expr_data <- data.frame(
      cell_line_name = rep(c("CL1", "CL2"), each = 50),
      GSE_id = rep(c("GSE1", "GSE2"), each = 50),
      model = rep(c("M1", "M2"), each = 50),
      expr = rnorm(100),
      age_group = sample(c('A', 'B'), 100, replace = TRUE)
    )
    
    # Using facet_wrap with hidden panels
    expr_data |> 
      mutate(x = paste0(cell_line_name, '_', GSE_id, '_', model)) |> 
      ggplot(aes(x = age_group, y = expr, color = age_group)) + 
      geom_boxplot() + 
      geom_point(position = position_jitter(), alpha = 0.5) + 
      geom_signif(
        comparisons = list(c("A", "B")),
        map_signif_level = TRUE, textsize = 6
      ) +
      facet_wrap(~x, scales = "free_x", strip.position = "bottom") +
      ylim(c(NA, 3)) +
      labs(x = "Axis Title") +
      theme_bw() +
      theme(
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.border = element_blank(),
        strip.background = element_blank()
      )
    

    
    # Adding age-group into the x-axis
    expr_data |> 
      mutate(x = paste0(cell_line_name, '_', GSE_id, '_', model, "_", age_group)) |> 
      ggplot(aes(x = x, y = expr, color = age_group)) + 
      geom_boxplot() + 
      geom_point(position = position_jitter(), alpha = 0.5) + 
      geom_signif(
        comparisons = list(c("CL1_GSE1_M1_A", "CL1_GSE1_M1_B"), c("CL2_GSE2_M2_A", "CL2_GSE2_M2_B")),
        map_signif_level = TRUE, textsize = 6
      ) +
      ylim(c(NA, 3)) +
      labs(x = "Axis Title") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    

    Created on 2024-04-20 with reprex v2.1.0