I'm working on a dataframe (called df) looking something like this (shortened here for practical reasons):
Observed | Shannon | InvSimpson | Evenness | Month |
---|---|---|---|---|
688 | 4.553810 | 23.365814 | 0.6969632 | February |
749 | 4.381557 | 15.162467 | 0.6619927 | February |
610 | 3.829187 | 11.178981 | 0.5970548 | February |
665 | 4.201113 | 16.284009 | 0.6463463 | March |
839 | 5.185554 | 43.198709 | 0.7702601 | March |
757 | 4.782258 | 31.011366 | 0.7213751 | March |
516 | 3.239304 | 4.765211 | 0.5186118 | April |
... | ... | ... | ... | ... |
I am running a post-hoc test using Dunn's test, then adding the xy positions, and plotting everything as boxplots. It works but my code is very repetitive...
library(rstatix)
obs_dunn <- dunn_test(Observed ~ Month, data=df, p.adjust.method="BH")
obs_dunn <- obs_dunn %>% arrange(p.adj)
obs_dunn <- obs_dunn %>% add_xy_position(x = "Month")
obs_bxp <- ggboxplot(df, x = "Month", y = "Observed" ) +
stat_pvalue_manual(obs_dunn, label = "p.adj.signif", hide.ns = TRUE)
obs_bxp
sh_dunn <- dunn_test(Shannon ~ Month, data=df, p.adjust.method="BH")
sh_dunn <- sh_dunn %>% arrange(p.adj)
sh_dunn <- sh_dunn %>% add_xy_position(x = "Month")
sh_bxp <- ggboxplot(df, x = "Month", y = "Shannon" ) +
stat_pvalue_manual(sh_dunn, label = "p.adj.signif", hide.ns = TRUE)
sh_bxp
inv_dunn <- dunn_test(InvSimpson ~ Month, data=df, p.adjust.method="BH")
inv_dunn <- inv_dunn %>% arrange(p.adj)
inv_dunn <- inv_dunn %>% add_xy_position(x = "Month")
inv_bxp <- ggboxplot(df, x = "Month", y = "InvSimpson" ) +
stat_pvalue_manual(inv_dunn, label = "p.adj.signif", hide.ns = TRUE)
inv_bxp
ev_dunn <- dunn_test(Evenness ~ Month, data=df, p.adjust.method="BH")
ev_dunn <- ev_dunn %>% arrange(p.adj)
ev_dunn <- ev_dunn %>% add_xy_position(x = "Month")
ev_bxp <- ggboxplot(df, x = "Month", y = "Evenness" ) +
stat_pvalue_manual(ev_dunn, label = "p.adj.signif", hide.ns = TRUE)
ev_bxp
I get basic boxplots with the significance stars added in, here's the result for the "Observed" one for example:
It's the same lines of code for each indices (Observed, Shannon, InvSimpson, Evenness), so I would like to make a for loop, but I'm quite new at that and I'm really struggling.
Do you have any idea how I could run a loop for the dunn_test()
, add_xy_position()
and ggboxplot()
on my 4 indices? Preferably with a separate dataframe as output for each indices.
Even just a loop for the first step dunn_test()
would already be so much help, because I don't know where to start...
Thanks in advance for any advice you would have. :)
Just use fo
rmula and data
as arguments in a function as well as ...
for additional arguments such as bracket.nudge.y=
to adjust for the silly n.s
whitespace.
dunn_bxp <- \(fo, data, plot=TRUE, ...) {
vars <- all.vars(fo)
dnn <- rstatix::dunn_test(fo, data=data, p.adjust.method="BH")
dnn_p <- rstatix::add_xy_position(dnn, x=vars[2])
if (plot) {
p <- ggpubr::ggboxplot(data, x=vars[2], y=vars[1]) +
ggpubr::stat_pvalue_manual(dnn_p, label="p.adj.signif", hide.ns=TRUE, ...)
print(p)
return(invisible(as.data.frame(dnn[1:8])))
} else {
return(as.data.frame(dnn[1:8]))
}
}
If plot=TRUE
(the default), it plots and invisibly returns the statistics.
r <- dunn_bxp(fo=observed ~ month, data=df, bracket.nudge.y=-1000)
head(r)
# .y. group1 group2 n1 n2 statistic p p.adj
# 1 observed Apr Aug 3 3 -0.6199874 0.535266078 0.71803318
# 2 observed Apr Dec 3 3 -2.7124449 0.006678888 0.08816133
# 3 observed Apr Feb 3 3 -1.1237272 0.261128784 0.50954093
# 4 observed Apr Jan 3 3 -1.7049654 0.088200884 0.32174752
# 5 observed Apr Jul 3 3 -1.7049654 0.088200884 0.32174752
# 6 observed Apr Jun 3 3 0.7749843 0.438348961 0.64291181
If plot=FALSE
it just returns the statistics.
head(dunn_bxp(fo=observed ~ month, data=df, plot=FALSE))
# .y. group1 group2 n1 n2 statistic p p.adj
# 1 observed Apr Aug 3 3 -0.6199874 0.535266078 0.71803318
# 2 observed Apr Dec 3 3 -2.7124449 0.006678888 0.08816133
# 3 observed Apr Feb 3 3 -1.1237272 0.261128784 0.50954093
# 4 observed Apr Jan 3 3 -1.7049654 0.088200884 0.32174752
# 5 observed Apr Jul 3 3 -1.7049654 0.088200884 0.32174752
# 6 observed Apr Jun 3 3 0.7749843 0.438348961 0.64291181
Data:
set.seed(557)
df <- data.frame(observed=round(rep(runif(12, 500, 800), each=3) + rep.int(rnorm(12, 0, 64), 3)),
month=rep(month.abb, each=3))