I am building a stacked bargraph using ggplot. here is the code I am currently using to generate the plot that stacks the sum of the values in apop, nec, and late but with different colored the bars so it can be known how much that category contributes to the sum.
This is a picture of the graph I get when I simply ignore the stats.
This is what I have tried so far
data table
condition | rep | nec | late | apop |
---|---|---|---|---|
37_colo_control | rep1 | 0.0209 | 0.0334 | 0.0405 |
37_colo_control | rep2 | 0.0013 | 0.0402 | 0.0541 |
37_colo_control | rep3 | 0.0076 | 0.0546 | 0.0707 |
42_colo_control | rep1 | 0.0147 | 0.0564 | 0.0616 |
42_colo_control | rep2 | 0.0233 | 0.0596 | 0.0762 |
42_colo_control | rep3 | 0.0176 | 0.0461 | 0.0507 |
37_colo_mmc | rep1 | 0.01210 | 0.0976 | 0.2370 |
37_colo_mmc | rep2 | 0.00860 | 0.1090 | 0.2410 |
37_colo_mmc | rep3 | 0.00760 | 0.1110 | 0.2890 |
42_colo_mmc | rep1 | 0.00870 | 0.1120 | 0.3020 |
42_colo_mmc | rep2 | 0.01220 | 0.1330 | 0.3270 |
42_colo_mmc | rep3 | 0.00870 | 0.1120 | 0.3020 |
above sample data as dataframe "the_data":
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
code
library(ggpubr)
library(ggprism)
library(ggplot2)
the_data <- read.csv(**[[see table for data]]**)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data %>%
group_by(condition)
fig_bar <- ggplot(the_data, aes(x=factor(condition, levels=factored_condition)))+
geom_bar(aes(y=apop+nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey65") +
stat_compare_means(mapping = aes(y=apop),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec+late),position=position_dodge(), stat="summary", fun="mean", fill = "grey45") +
stat_compare_means(mapping = aes(y=late),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
geom_bar(aes(y=nec),position=position_dodge(), stat="summary", fun="mean", fill = "grey 15") +
stat_compare_means(mapping = aes(y=nec),
comparisons = comparisons, paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = c("****","***", "**", "*", " "))) +
labs(y="Percent of Cells", x="", fill = "") +
ggtitle("Colo205") +
scale_y_continuous(expand=c(0,0),limits = c(0,1.0), labels = scales::percent) +
scale_x_discrete(labels=x.names) +
theme_prism()
fig_bar
basically what I tried doing is just copy pasting the stat-compare-means section to each individual bar graph. however I keep getting an error code... not sure what is wrong as I am putting y=apop//nec//late in the aes.
Error in `ggsignif::geom_signif()`:
! Problem while computing stat.
i Error occurred in the 3rd layer.
Caused by error in `compute_layer()`:
! `stat_signif()` requires the following missing aesthetics: y
Backtrace:
Things get easier with the concept of tidy data which in this case includes reshaping your data to long format. Doing so, you don't have to issue the same instruction for each and every column but instead do it once per each group (derived from the initial column names).
Example:
library(dplyr)
library(tidyr) ## to reshape
library(ggplot2)
library(ggpubr)
library(ggprism)
factored_condition <- c("37_colo_control","37_colo_mmc","42_colo_control","42_colo_mmc")
comparisons <- list(c(factored_condition[1],factored_condition[2]),
c(factored_condition[1],factored_condition[3]),
c(factored_condition[1],factored_condition[4])
)
the_data <- structure(list(condition = c("37_colo_control", "37_colo_control",
"37_colo_control", "42_colo_control", "42_colo_control", "42_colo_control",
"37_colo_mmc", "37_colo_mmc", "37_colo_mmc", "42_colo_mmc", "42_colo_mmc",
"42_colo_mmc"), rep = c("rep1", "rep2", "rep3", "rep1", "rep2",
"rep3", "rep1", "rep2", "rep3", "rep1", "rep2", "rep3"), nec = c(0.0209,
0.0013, 0.0076, 0.0147, 0.0233, 0.0176, 0.0121, 0.0086, 0.0076,
0.0087, 0.0122, 0.0087), late = c(0.0334, 0.0402, 0.0546, 0.0564,
0.0596, 0.0461, 0.0976, 0.109, 0.111, 0.112, 0.133, 0.112), apop = c(0.0405,
0.0541, 0.0707, 0.0616, 0.0762, 0.0507, 0.237, 0.241, 0.289,
0.302, 0.327, 0.302)), class = "data.frame", row.names = c(NA,
12L))
the_data <-
the_data |>
pivot_longer(cols = nec:apop, names_to = 'parameter') |>
mutate(value_percent = prop.table(value))
> head(the_data, 4)
# A tibble: 6 x 5
condition rep parameter value value_percent
<chr> <chr> <chr> <dbl> <dbl>
1 37_colo_control rep1 nec 0.0209 0.00661
2 37_colo_control rep1 late 0.0334 0.0106
3 37_colo_control rep1 apop 0.0405 0.0128
4 37_colo_control rep2 nec 0.0013 0.000411
ggplot(the_data, aes(x = condition, y = value_percent, group = parameter)) +
geom_col(aes(fill = parameter), alpha = .5) +
stat_compare_means(comparisons = comparisons,
paired = TRUE, method = "t.test", label="p.signif",
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf),
symbols = sapply(4:0, \(n) substr('****', 0, n))
),
step.increase = .5 ## increase vertical spacing between brackets
) +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_fill_grey()