Search code examples
rggplot2time-seriesreshape

adding standard errors to correct panels on faceted bar chart


I have this dataframe:

structure(list(taxon = c("Acidaminococcus", "Butyricicoccus", 
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides", 
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957, 
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253, 
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504, 
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816, 
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562, 
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181, 
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188, 
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725, 
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655, 
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087, 
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043, 
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725, 
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535, 
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069, 
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486, 
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693, 
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852, 
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665, 
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657, 
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927, 
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683, 
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519, 
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276, 
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791, 
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")

It is a dataframe where each row is a category, and the columns correspond with a time series (from T0 til T5).

I want to do a bar chart for each category (taxon) for their time (T0-T5):

  melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
  ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ taxon, ncol = 3) +
  labs(title = "Bar Chart for Different Time Series",
       x = "Time Point",
       y = "Value",
       fill = "Category")

enter image description here

The question is if it is possible to assign the standard error (se columns) to their logFC value (lfc columns) for each time series.

Update:

I did this, but only for T0:

ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("Category") +
  ylab("lfc_StatusChronic.ACST0.") +
  ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category") +

  # Add error bars using se_StatusChronic.ACST0. column
  geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0., 
                    ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
                width = 0.4)

Output expected (the image is from other data):

enter image description here


Solution

  • Is this what you're looking for?

    library(dplyr)
    library(tidyr)
    library(ggplot2)
    dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus", 
    "Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides", 
    "Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957, 
    0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253, 
    -0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504, 
    0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816, 
    0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562, 
     0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181, 
     -0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188, 
     0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725, 
     0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655, 
    -0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087, 
    -0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043, 
    0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725, 
    0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535, 
     0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069, 
     -0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486, 
    0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693, 
    0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852, 
     -0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665, 
     0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657, 
     0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927, 
     0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683, 
     0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519, 
     -0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276, 
    0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791, 
    0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
    ), class = "data.frame")
    
      dat %>% 
        rename(lfc_time.fT0 = lfc_StatusChronic.ACST0., 
               se_Time.fT0 = se_StatusChronic.ACST0.) %>%
        pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)", 
                     names_to = c(".value", "time")) %>% 
        ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
        geom_bar(stat = "identity") +
        geom_errorbar(width=.4) + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
        scale_fill_brewer(palette="Set1") + 
        xlab("Category") +
        ylab("lfc_StatusChronic.ACST0.") +
        facet_wrap(~taxon, ncol=1) + 
        ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")```
    

    enter image description here

    If so, the key is to rename the T0 variables to have the same format as the other time-period variables and then use pivot_longer() to put all the lfc measures in a single column and all the se measures in a single column. The rest is accomplished with faceting on the time variable. The pivot_longer() documentation has some good examples of retaining multiple columns, see in particular the last example on the page.