Search code examples
rggplot2errorbarstacked-bar-chartgrouped-bar-chart

Stacked bars with 2 groups and error bars


I'm trying to create a stacked bar plot with 2 groups and error bars from a soil data set. I've got the factors Time = c(May 21, July 21, July 22), Treatment = c(T1, T2, T3, T4), and Fraction = c(Fine earth, Plant remains). There is also 'Repl' and 'Parcel' but these are not relevant for plotting. I'd like to plot the dependent variable Corg with Time in the x axis and grouped by Treatment and Fraction, the latter stacked. Therefore, 'Fine earth' stacked onto 'Plant remains'.

Here's the output of dput():

dput(datas)
structure(list(Treatment = c("T1", "T1", "T1", "T1", "T2", "T2", 
"T2", "T2", "T3", "T3", "T3", "T3", "T4", "T4", "T4", "T4", "T1", 
"T1", "T1", "T1", "T2", "T2", "T2", "T2", "T3", "T3", "T3", "T3", 
"T4", "T4", "T4", "T4", "T1", "T1", "T1", "T1", "T2", "T2", "T2", 
"T2", "T3", "T3", "T3", "T3", "T4", "T4", "T4", "T4", "T1", "T1", 
"T1", "T1", "T2", "T2", "T2", "T2", "T3", "T3", "T3", "T3", "T4", 
"T4", "T4", "T4", "T1", "T1", "T1", "T1", "T2", "T2", "T2", "T2", 
"T3", "T3", "T3", "T3", "T4", "T4", "T4", "T4", "T1", "T1", "T1", 
"T1", "T2", "T2", "T2", "T2", "T3", "T3", "T3", "T3", "T4", "T4", 
"T4", "T4"), Repl = c("R1", "R2", "R3", "R4", "R1", "R2", "R3", 
"R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", 
"R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", 
"R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", 
"R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", 
"R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", 
"R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", 
"R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", 
"R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R3", 
"R4"), Parcel = structure(c(1L, 5L, 9L, 13L, 2L, 6L, 10L, 14L, 
3L, 7L, 11L, 15L, 4L, 8L, 12L, 16L, 1L, 5L, 9L, 13L, 2L, 6L, 
10L, 14L, 3L, 7L, 11L, 15L, 4L, 8L, 12L, 16L, 1L, 5L, 9L, 13L, 
2L, 6L, 10L, 14L, 3L, 7L, 11L, 15L, 4L, 8L, 12L, 16L, 1L, 5L, 
9L, 13L, 2L, 6L, 10L, 14L, 3L, 7L, 11L, 15L, 4L, 8L, 12L, 16L, 
1L, 5L, 9L, 13L, 2L, 6L, 10L, 14L, 3L, 7L, 11L, 15L, 4L, 8L, 
12L, 16L, 1L, 5L, 9L, 13L, 2L, 6L, 10L, 14L, 3L, 7L, 11L, 15L, 
4L, 8L, 12L, 16L), levels = c("1", "2", "3", "4", "5", "6", "7", 
"8", "9", "10", "11", "12", "13", "14", "15", "16"), class = "factor"), 
    Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("May 21", 
    "July 21", "July 22"), class = "factor"), Fraction = c("Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Fine earth", "Fine earth", "Fine earth", 
    "Fine earth", "Fine earth", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains", "Plant remains", "Plant remains", 
    "Plant remains", "Plant remains"), Corg = c(2.242, 2.3539374, 
    2.24021216, 2.29317597, 2.02245578, 2.19354698, 2.25056174, 
    1.97073662, 2.03775086, 2.182, 2.054, 2.23143852, 2.43, 1.963, 
    1.95676108, 1.91028975, 2.46682164, 2.17334891, 1.95329696, 
    1.92659278, 2.49012358, 2.29460285, 2.14150798, 2.0982069, 
    3.20281214, 3.31694401, 2.94037862, 2.86813115, 2.71119738, 
    2.2551189, 2.58737415, 2.52201226, 2.568840791, 2.268484741, 
    2.276562231, 2.232093049, 2.629438029, 2.130462962, 2.181554457, 
    2.441597329, 2.677526322, 2.793236578, 2.675139082, 2.446825908, 
    2.38063425, 2.452040834, 2.214548263, 2.020306975, 30.58, 
    47.627, 39.743, 40.186, 29.493, 36.148, 35.165, 36.891, 19.502, 
    34.743, 41.201, 36.061, 38.791, 27.523, 41.801, 45.271, 40.159, 
    41.955, 36.073, 38.769, 44.57, 39.843, 42.192, 44.747, 45.983, 
    47.563, 44.659, 49.04, 49.658, 45.392, 48.546, 42.873, 41.302, 
    45.665, 23.643, 32.305, 27.708, 38.898, 32.446, 32.134, 46.13, 
    34.115, 44.719, 46.675, 37.619, 46.065, 39.547, 38.268)

I've tried with the function ggbarplot () but I find it impossible to have more than 1 grouping variable and I have to drop one.

library(ggpubr)
library(rstatix)
res.stats <- datamay %>%
  group_by(Treatment) %>%
  t_test(C..org ~ Fraction) %>%
  adjust_pvalue() %>%
  add_significance()
res.stats

plot<- ggbarplot(datamay, x = "Treatment",
                 y = "Corg", add = "mean_se", add.params=list(color = "black"),
                 color ="Fraction", fill= "Fraction",
                 palette=c("darkgoldenrod4", "darkolivegreen4"),
                 ylim=c(0,60))
plot

Also, I have tried with ggplot2 dropping Treatment as the grouping variable but I can't manage to get the error bars where they shoud be.

library(ggplot2)
library(Rmisc)

summdata <- summarySE(datas, measurevar="Corg", groupvars=c("Time", "Treatment","Fraction"), na.omit(TRUE))

plot<-ggplot(summdata, aes(x=Time, y=Corg, fill=Fraction)) + 
  geom_bar(stat="identity") +
  geom_errorbar(aes(ymin=Corg-se, ymax=Corg+se),
                width=.2,                    
                position=position_dodge(.9))
plot

Is it even possible to plot a stacked bar plot with two grouping variables? Any help will be appreciated.

Thanks in advance and let me know if you need any more information about the dataset.


Solution

  • I think it would be easiest to summarize your data frame using dplyr to get the mean values and error bars, then plot that. You can use facets to show the effects of different treatments. Your request to stack bars is possible, but if you also want error bars this is very problematic from a data visualization point of view, and you should stick to dodged rather than stacked bars:

    library(tidyverse)
    
    datas %>%
      group_by(Time, Fraction, Treatment) %>%
      summarise(se = sd(Corg)/sqrt(n()),
                Corg = mean(Corg),
                upper = Corg + 1.96 * se,
                lower = Corg - 1.96 * se) %>%
      ggplot(aes(Time, Corg, fill = Fraction)) + 
      geom_col(position = position_dodge()) +
      geom_point(position = position_dodge(1), alpha = 0.2) +
      geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.5, 
                    position = position_dodge(width = 1), alpha = 0.3) +
      facet_grid(.~Treatment) +
      theme_minimal() +
      scale_fill_manual(values = c('deepskyblue4', 'orange'))
    

    enter image description here