My data looks like this
dput(rbind.data.frame(head(combined_iresidues), tail(combined_iresidues)))
structure(list(atomnum = c(889, 889, 889, 889, 238, 238, 121,
121, 914, 914, 914, 914), atominfo = c("C115-SG", "C115-SG",
"C115-SG", "C115-SG", "C30-SG", "C30-SG", "G16-C", "G16-C", "D119-CB",
"D119-CB", "D119-CB", "D119-CB"), dx = c("d1", "d2", "d3", "d4",
"d1", "d2", "d3", "d4", "d1", "d2", "d3", "d4"), value = c(0.0736,
0.0912, 0.1027, 0.1046, 0.0734, 0.11, 0.0263, 0.0255, 0.0048,
0.0185, 0.0231, 0.0184), facet_title = c("F[o]^2 - F[o]^1", "F[o]^3 - F[o]^1",
"F[o]^4 - F[o]^1", "F[o]^5 - F[o]^1", "F[o]^2 - F[o]^1", "F[o]^3 - F[o]^1",
"F[o]^4 - F[o]^1", "F[o]^5 - F[o]^1", "F[o]^2 - F[o]^1", "F[o]^3 - F[o]^1",
"F[o]^4 - F[o]^1", "F[o]^5 - F[o]^1"), xstl = c("XS1-4.5", "XS1-4.5",
"XS1-4.5", "XS1-4.5", "XS1-4.5", "XS1-4.5", "XS1-9.5", "XS1-9.5",
"XS1-9.5", "XS1-9.5", "XS1-9.5", "XS1-9.5"), pH = c("4.5", "4.5",
"4.5", "4.5", "4.5", "4.5", "9.5", "9.5", "9.5", "9.5", "9.5",
"9.5"), dataset = c("b3x11", "b3x11", "b3x11", "b3x11", "b3x11",
"b3x11", "b3x14", "b3x14", "b3x14", "b3x14", "b3x14", "b3x14"
), mean = c(0.0204145631067961, 0.0239122683142101, 0.0292734333627538,
0.0319611650485437, 0.0204145631067961, 0.0239122683142101, 0.0273805996472663,
0.0315843915343915, 0.0181168430335097, 0.0231408289241623, 0.0273805996472663,
0.0315843915343915), sd = c(0.00984739879326438, 0.0112094908468026,
0.0117753129471673, 0.0134786092559798, 0.00984739879326438,
0.0112094908468026, 0.0110648522827486, 0.013277545556386, 0.00666584795415998,
0.00959848563099842, 0.0110648522827486, 0.013277545556386),
residue_type = c("CYS", "CYS", "CYS", "CYS", "CYS", "CYS",
"GLY", "GLY", "ASP|GLU", "ASP|GLU", "ASP|GLU", "ASP|GLU")), row.names = c(NA,
-12L), groups = structure(list(dx = c("d1", "d2", "d3", "d4"),
.rows = structure(list(c(1L, 5L), c(2L, 6L), 3L, 4L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
My code to generate the plot is like this
# Boxplot for interesting residues
ggplot(data = combined_iresidues, aes(x = factor(residue_type, levels = c("CYS", "ASP|GLU", "MET", "GLY")), y = value, fill = factor(xstl))) + geom_hline(aes(yintercept = mean + 3 * sd, colour = factor(xstl)), linetype = "dashed", linewidth = 1.0) +
geom_boxplot(outlier.alpha=0.5) +
scale_y_continuous(breaks = c(0, 0.05, 0.10, 0.15)) +
facet_wrap(~facet_title, labeller = label_parsed) +
theme_bw(base_size = 16) +
labs(
x = "Residue type",
y = "y_label",
fill = "Crystal", # Change legend title for color
colour = "Crystal"
)
If I add to the code above + geom_signif(comparisons = list(c(1,2)))
I get the lines between two different residue_types, not within the same residue_types.
From this answer (geom_signif between boxplots of different x and same group) It seems that I need to get a new x (x2) ...
combined_iresidues %>%
mutate(x2 = interaction(xstl, residue_type)) %>%
ggplot(aes(x = factor(x2), y = value, fill = factor(xstl))) +
geom_hline(aes(yintercept = mean + 3 * sd, colour = factor(xstl)), linetype = "dashed", linewidth = 1.0) +
geom_boxplot(outlier.alpha=0.5) +
scale_y_continuous(breaks = c(0, 0.05, 0.10, 0.15))+
facet_wrap(~facet_title, labeller = label_parsed) +
theme_bw(base_size = 16) +
labs(
x = "Residue type",
y = "y_label",
fill = "Crystal", # Change legend title for color
colour = "Crystal"
) +
geom_signif(comparisons = list(c(1, 2), c(3, 4), c(5, 6), c(7, 8)), map_signif_level = TRUE, textsize = 5, y_position = 0.13)
This generates the significance lines but it would be good to have the x labels as in the first plot.
I tried changing the labels without success with scale_x_discrete(breaks = c(1.5, 3.5, 5.5, 7.5), labels = c("CYS", "ASP|GLU", "MET", "GLY"))
but I get no labels.
Is there an easier way to get the ggsignif without having a new x2
? Maybe ggsignif is not the best option here? Rotating the x-axis text helps a little bit but it is an ugly solution.
As we can pass a function to the labels=
argument of scale_x_discrete
we can use e.g. gsub()
to get keep only the residue_type
of the interaction variable. To this end I use an underscore as the sep=
arator in interaction()
. Additionally I converted residue_type
to a factor
first to get the desired order.
library(ggplot2)
library(ggsignif)
library(dplyr, warn = FALSE)
combined_iresidues |>
ungroup() |>
mutate(
residue_type = factor(residue_type,
levels = c("CYS", "ASP|GLU", "MET", "GLY")
),
x2 = interaction(xstl, residue_type, sep = "_")
) |>
ggplot(aes(x = x2, y = value, fill = xstl)) +
geom_hline(aes(
yintercept = mean + 3 * sd,
colour = factor(xstl)
), linetype = "dashed", linewidth = 1.0) +
geom_boxplot(outlier.alpha = 0.5) +
scale_y_continuous(breaks = c(0, 0.05, 0.10, 0.15)) +
scale_x_discrete(
labels = \(x) gsub("^.*_", "", x)
) +
facet_wrap(~facet_title, labeller = label_parsed) +
theme_bw(base_size = 16) +
labs(
x = "Residue type",
y = "y_label",
fill = "Crystal", # Change legend title for color
colour = "Crystal"
) +
ggsignif::geom_signif(
comparisons = list(c(1, 2), c(3, 4), c(5, 6), c(7, 8)),
map_signif_level = TRUE, textsize = 5, y_position = 0.13
)
#> Warning: Computation failed in `stat_signif()`.
#> Caused by error in `wilcox.test.default()`:
#> ! not enough 'y' observations
#> Warning: Computation failed in `stat_signif()`.
#> Caused by error in `wilcox.test.default()`:
#> ! not enough 'y' observations