ggplot2 bar graph with statistics (apoptosis/necrosis assay)

I am building a stacked bargraph using ggplot. here is the code I am currently using to generate the plot that stacks the sum of the values in apop, nec, and late but with different colored the bars so it can be known how much that category contributes to the sum.

This is a picture of the graph I get when I simply ignore the stats.

This is what I have tried so far

data table

condition	rep	nec	late	apop
37_colo_control	rep1	0.0209	0.0334	0.0405
37_colo_control	rep2	0.0013	0.0402	0.0541
37_colo_control	rep3	0.0076	0.0546	0.0707
42_colo_control	rep1	0.0147	0.0564	0.0616
42_colo_control	rep2	0.0233	0.0596	0.0762
42_colo_control	rep3	0.0176	0.0461	0.0507
37_colo_mmc	rep1	0.01210	0.0976	0.2370
37_colo_mmc	rep2	0.00860	0.1090	0.2410
37_colo_mmc	rep3	0.00760	0.1110	0.2890
42_colo_mmc	rep1	0.00870	0.1120	0.3020
42_colo_mmc	rep2	0.01220	0.1330	0.3270
42_colo_mmc	rep3	0.00870	0.1120	0.3020

above sample data as dataframe "the_data":

the_data <- structure(list(condition = c("37_colo_control", "37_colo_control", 
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control", 
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc", 
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2", 
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209, 
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076, 
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564, 
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405, 
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289, 
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA, 
12L))

code

library(ggpubr)
library(ggprism)
library(ggplot2)


the_data <- read.csv(**[[see table for data]]**)

factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
                    c(factored_condition[1],factored_condition[3]),
                    c(factored_condition[1],factored_condition[4])
                    )


the_data %>%
  group_by(condition)

fig_bar <- ggplot(the_data, aes(x=factor(condition, levels=factored_condition)))+ 
  geom_bar(aes(y=apop+nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey65") +
  stat_compare_means(mapping = aes(y=apop), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +

    geom_bar(aes(y=nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey45") +
    stat_compare_means(mapping = aes(y=late), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +
  
    geom_bar(aes(y=nec),position=position_dodge(), stat="summary", fun="mean", fill = "grey 15") +
    stat_compare_means(mapping = aes(y=nec), 
                     comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = c("****","***", "**", "*", " "))) +
 
  labs(y="Percent of Cells", x="", fill = "") +
  ggtitle("Colo205") +
  scale_y_continuous(expand=c(0,0),limits = c(0,1.0), labels = scales::percent) +
  scale_x_discrete(labels=x.names) +
  theme_prism()

fig_bar

basically what I tried doing is just copy pasting the stat-compare-means section to each individual bar graph. however I keep getting an error code... not sure what is wrong as I am putting y=apop//nec//late in the aes.

Error in `ggsignif::geom_signif()`:
! Problem while computing stat.
i Error occurred in the 3rd layer.
Caused by error in `compute_layer()`:
! `stat_signif()` requires the following missing aesthetics: y
Backtrace:

Solution

Things get easier with the concept of tidy data which in this case includes reshaping your data to long format. Doing so, you don't have to issue the same instruction for each and every column but instead do it once per each group (derived from the initial column names).

Example:

prelude

library(dplyr)
library(tidyr) ## to reshape

library(ggplot2)
library(ggpubr)
library(ggprism)

factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")

comparisons <- list(c(factored_condition[1],factored_condition[2]),
                    c(factored_condition[1],factored_condition[3]),
                    c(factored_condition[1],factored_condition[4])
                    )

the_data <- structure(list(condition = c("37_colo_control", "37_colo_control", 
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control", 
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc", 
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2", 
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209, 
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076, 
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564, 
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405, 
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289, 
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA, 
12L))

reshape and calculate percentages:

the_data <- 
  the_data |>
  pivot_longer(cols = nec:apop, names_to = 'parameter') |>
  mutate(value_percent = prop.table(value))

> head(the_data, 4)
# A tibble: 6 x 5
  condition       rep   parameter  value value_percent
  <chr>           <chr> <chr>      <dbl>         <dbl>
1 37_colo_control rep1  nec       0.0209      0.00661 
2 37_colo_control rep1  late      0.0334      0.0106  
3 37_colo_control rep1  apop      0.0405      0.0128  
4 37_colo_control rep2  nec       0.0013      0.000411

plot:

ggplot(the_data, aes(x = condition, y = value_percent, group = parameter)) +
  geom_col(aes(fill = parameter), alpha = .5) +
  stat_compare_means(comparisons = comparisons,
                     paired = TRUE, method = "t.test", label="p.signif",
                     symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), 
                                        symbols = sapply(4:0, \(n) substr('****', 0, n))
                                        ),
                     step.increase = .5 ## increase vertical spacing between brackets
                     ) +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_fill_grey()