How can I display letters on a ggstatsplot
package plot for Kruskal Walllis test? This is a reproducible example based on this question.
set.seed(123)
# Create vector for number of cases per month
cases_per_month <- c(10, 25, 20, 20, 25, 20, 19, 5)
# Create vector for months (April to November)
months <- c("April", "May", "June", "July", "August", "September", "October", "November")
# Create empty vectors for final dataset
dataset <- data.frame(mean_severity = numeric(), month = character())
# Generate dataset
dat <- list()
for (i in 1:length(months)) {
month <- rep(months[i], cases_per_month[i])
severity <- sample.int(n = 10, size = cases_per_month[i], replace = TRUE)
# generate some differences in the sample
if (i %in% c(1, 4, 7)){
severity <- severity^2
}
temp_data <- data.frame(mean_severity = severity, month = month)
dat[[i]] <- rbind(dataset, temp_data)
}
# Using rbind to combine rows
dat <- do. Call(rbind, dat)
Currently, I have bars showing p-values. I want letters instead of bars showing p values. The question has been answered here. Apparently, AddLetters
function should show letters instead of p values as shown below in his example, but it runs indefinitely without displaying any letters in my case. Is there any other way of displaying letters?
Example plot where letters are shown instead of bars here
We have to change the input to match multcompLetters
.
Here is how to do it.
library(Matrix)
library(PMCMRplus)
library(ggstatsplot)
#this does the plot in your post
p <- ggbetweenstats(data = dat, y = mean_severity, x = month, type = "nonparametric")
#this does the Dunn pairwise tests
#kwAllPairsDunnTest(mean_severity ~ month, data=dat, p.adjust.method = "holm")
#now we have to format the p-value matrix into a symmetric matrix for multcompLetters
pval.matrix <- kwAllPairsDunnTest(x = dat$mean_severity,
g = as.factor(dat$month), p.adjust.method = "holm")
> pval.matrix$p.value
April August July June May November October
August 0.004092534 NA NA NA NA NA NA
July 1.000000000 0.001097334 NA NA NA NA NA
June 0.004907027 1.000000000 0.001911186 NA NA NA NA
May 0.029411147 1.000000000 0.015500796 1.0000000000 NA NA NA
November 0.136459815 1.000000000 0.167977744 1.0000000000 1.000000000 NA NA
October 1.000000000 0.000118815 1.000000000 0.0002455998 0.002648157 0.07101786 NA
September 0.009390306 1.000000000 0.004164969 1.0000000000 1.000000000 1.00000000 0.0006461642
We use forceSymmetric
from the Matrix
package after manipulating the pairwise p-value matrix:
#square and diagonalize the p-value matrix
new.pval.matrix <- rbind(1,pval.matrix$p.value)
new.pval.matrix <- cbind(new.pval.matrix, 1)
diag(new.pval.matrix) <- 1
new.pval.matrix <- as.matrix(forceSymmetric(new.pval.matrix, "L"))
#Add September to the row and column names
rownames(new.pval.matrix)[dim(pval.matrix$p.value)+1] <-
rownames(pval.matrix$p.value)[dim(pval.matrix$p.value)[1]]
colnames(new.pval.matrix)[dim(pval.matrix$p.value)+1] <-
rownames(pval.matrix$p.value)[dim(pval.matrix$p.value)[1]]
> new.pval.matrix
April August July June May November October September
April 1.000000000 0.004092534 1.000000000 0.0049070268 0.029411147 0.13645981 1.0000000000 0.0093903064
August 0.004092534 1.000000000 0.001097334 1.0000000000 1.000000000 1.00000000 0.0001188150 1.0000000000
July 1.000000000 0.001097334 1.000000000 0.0019111864 0.015500796 0.16797774 1.0000000000 0.0041649687
June 0.004907027 1.000000000 0.001911186 1.0000000000 1.000000000 1.00000000 0.0002455998 1.0000000000
May 0.029411147 1.000000000 0.015500796 1.0000000000 1.000000000 1.00000000 0.0026481566 1.0000000000
November 0.136459815 1.000000000 0.167977744 1.0000000000 1.000000000 1.00000000 0.0710178590 1.0000000000
October 1.000000000 0.000118815 1.000000000 0.0002455998 0.002648157 0.07101786 1.0000000000 0.0006461642
September 0.009390306 1.000000000 0.004164969 1.0000000000 1.000000000 1.00000000 0.0006461642 1.0000000000
Now multcompLetters
works:
> multcompLetters(new.pval.matrix)
April August July June May November October September
"a" "b" "a" "b" "b" "ab" "a" "b"
We can follow your link on how to prepare the CLD:
data.summary <- group_by(dat, month) %>%
summarise(mean=mean(mean_severity), sd=sd(mean_severity)) %>%
arrange(desc(mean))
#match ordering of the factors [month]
data.summary <- data.summary[order(data.summary$month),]
CLD <- multcompLetters(new.pval.matrix)
data.summary$CLD <- CLD$Letters
#you'll likely need to change these graphics options for your purposes
p + geom_text(data = data.summary, aes(label=CLD,x=month, y=mean),
position=position_dodge2(0.75), hjust = 3)