Search code examples
rggplot2stacked-bar-chart

ggplot2 bar graph with statistics (apoptosis/necrosis assay)


I am building a stacked bargraph using ggplot. here is the code I am currently using to generate the plot that stacks the sum of the values in apop, nec, and late but with different colored the bars so it can be known how much that category contributes to the sum.

This is a picture of the graph I get when I simply ignore the stats.

enter image description here

This is what I have tried so far

data table

condition rep nec late apop
37_colo_control rep1 0.0209 0.0334 0.0405
37_colo_control rep2 0.0013 0.0402 0.0541
37_colo_control rep3 0.0076 0.0546 0.0707
42_colo_control rep1 0.0147 0.0564 0.0616
42_colo_control rep2 0.0233 0.0596 0.0762
42_colo_control rep3 0.0176 0.0461 0.0507
37_colo_mmc rep1 0.01210 0.0976 0.2370
37_colo_mmc rep2 0.00860 0.1090 0.2410
37_colo_mmc rep3 0.00760 0.1110 0.2890
42_colo_mmc rep1 0.00870 0.1120 0.3020
42_colo_mmc rep2 0.01220 0.1330 0.3270
42_colo_mmc rep3 0.00870 0.1120 0.3020

above sample data as dataframe "the_data":

the_data <- structure(list(condition = c("37_colo_control", "37_colo_control", 
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control", 
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc", 
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2", 
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209, 
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076, 
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564, 
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405, 
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289, 
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA, 
12L))

code

library(ggpubr)
library(ggprism)
library(ggplot2)


the_data <- read.csv(**[[see table for data]]**)

factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
                    c(factored_condition[1],factored_condition[3]),
                    c(factored_condition[1],factored_condition[4])
                    )


the_data %>%
  group_by(condition)

fig_bar <- ggplot(the_data, aes(x=factor(condition, levels=factored_condition)))+ 
  geom_bar(aes(y=apop+nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey65") +
  stat_compare_means(mapping = aes(y=apop), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +

    geom_bar(aes(y=nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey45") +
    stat_compare_means(mapping = aes(y=late), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +
  
    geom_bar(aes(y=nec),position=position_dodge(), stat="summary", fun="mean", fill = "grey 15") +
    stat_compare_means(mapping = aes(y=nec), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +
 
  labs(y="Percent of Cells", x="", fill = "") +
  ggtitle("Colo205") +
  scale_y_continuous(expand=c(0,0),limits = c(0,1.0), labels = scales::percent) +
  scale_x_discrete(labels=x.names) +
  theme_prism()

fig_bar

basically what I tried doing is just copy pasting the stat-compare-means section to each individual bar graph. however I keep getting an error code... not sure what is wrong as I am putting y=apop//nec//late in the aes.

Error in `ggsignif::geom_signif()`:
! Problem while computing stat.
i Error occurred in the 3rd layer.
Caused by error in `compute_layer()`:
! `stat_signif()` requires the following missing aesthetics: y
Backtrace:

Solution

  • Things get easier with the concept of tidy data which in this case includes reshaping your data to long format. Doing so, you don't have to issue the same instruction for each and every column but instead do it once per each group (derived from the initial column names).

    Example:

    • prelude
    library(dplyr)
    library(tidyr) ## to reshape
    
    library(ggplot2)
    library(ggpubr)
    library(ggprism)
    
    factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
    
    comparisons <- list(c(factored_condition[1],factored_condition[2]),
                        c(factored_condition[1],factored_condition[3]),
                        c(factored_condition[1],factored_condition[4])
                        )
    
    the_data <- structure(list(condition = c("37_colo_control", "37_colo_control", 
    "37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control", 
    "37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc", 
    "42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2", 
    "rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209, 
    0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076, 
    0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564, 
    0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405, 
    0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289, 
    0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA, 
    12L))
    
    • reshape and calculate percentages:
    the_data <- 
      the_data |>
      pivot_longer(cols = nec:apop, names_to = 'parameter') |>
      mutate(value_percent = prop.table(value))
    
    > head(the_data, 4)
    # A tibble: 6 x 5
      condition       rep   parameter  value value_percent
      <chr>           <chr> <chr>      <dbl>         <dbl>
    1 37_colo_control rep1  nec       0.0209      0.00661 
    2 37_colo_control rep1  late      0.0334      0.0106  
    3 37_colo_control rep1  apop      0.0405      0.0128  
    4 37_colo_control rep2  nec       0.0013      0.000411 
    
    • plot:
    ggplot(the_data, aes(x = condition, y = value_percent, group = parameter)) +
      geom_col(aes(fill = parameter), alpha = .5) +
      stat_compare_means(comparisons = comparisons,
                         paired = TRUE, method = "t.test", label="p.signif",
                         symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                            symbols = sapply(4:0, \(n) substr('****', 0, n))
                                            ),
                         step.increase = .5 ## increase vertical spacing between brackets
                         ) +
      scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
      scale_fill_grey()
    

    stacked plot with group comparisons