Search code examples
r

Not getting the significance that is apparent from my barplot (and with Prism) with kruskal-wallis test, Dunn-test?


Someone plotted this data for me in prism using KW and Dunn's and I wanted to do it in R, but I'm getting way different R results. For instance, in prism there are sig differences between the control and treatments in all three facets. Also I'm getting an error of:

Warning messages: 1: Computation failed in stat_signif(). Caused by error in kruskal.test.default(): ! all observations are in the same group 2: Computation failed in stat_signif(). Caused by error in kruskal.test.default(): ! all observations are in the same group

library(tidyverse)
library(ggpubr)
library(FSA)
library(rcompanion)



DTData <- read.csv("C:/Users/Coral HD/R/Amox/LiquidInhibition/LiquidInhib_ZeroAmox.csv")

data$Day <- as.factor(data$Day)
data$Treatment <- as.factor(data$Treatment)

# Create a faceted bar plot
p <- ggplot(data, aes(x=Treatment, y=CFUs, fill=Treatment)) +
  geom_bar(stat="summary", fun=mean) +
  facet_wrap(~ Day, scales="fixed") +
  labs(title="CFUs by Treatment across Different Days", y="Mean CFUs", x="Treatment") +
  theme_bw()
print(p)

# Perform Kruskal-Wallis Test by Day
kw_results <- data %>%
  group_by(Day) %>%
  summarise(Kruskal_Wallis = kruskal.test(CFUs ~ Treatment)$p.value)

# Print Kruskal-Wallis results
print(kw_results)

# Perform Dunn's Test for multiple comparisons
dunn_results <- list()
for (day in unique(data$Day)) {
  subset_data <- data[data$Day == day,]
  dunn_test <- dunnTest(CFUs ~ Treatment, data = subset_data, method = "bonferroni")
  dunn_results[[as.character(day)]] <- dunn_test
}

# Print Dunn's Test results
print(dunn_results)

# Adding significance brackets
# Please note: Adjusting manually as per p-values might be necessary
p + stat_compare_means(label = "p.signif", method = "kruskal.test", 
                       comparisons = list(c("0", "100"), c("0", "400"), c("100", "400")), 
                       label.y = c(300, 400, 500)) 

enter image description here

structure(list(Day = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L), levels = c("1", "14", "28"), class = "factor"), 
    Treatment = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 
    1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 
    3L, 3L, 3L), levels = c("0", "100", "400"), class = "factor"), 
    CFUs = c(138L, 148L, 141L, 0L, 0L, 0L, 0L, 0L, 0L, 348L, 
    290L, 321L, 0L, 0L, 0L, 0L, 0L, 0L, 250L, 215L, 218L, 28L, 
    45L, 34L, 35L, 41L, 28L)), row.names = c(NA, -27L), class = "data.frame")

Updated the code to fix DTData. Change to data. But my question still remains. Expecting to get enter image description here

library(tidyverse)
library(ggpubr)
library(FSA)
library(rcompanion)



data <- read.csv("C:/Users/Coral HD/R/Amox/LiquidInhibition/LiquidInhib_ZeroAmox.csv")

data$Day <- as.factor(data$Day)
data$Treatment <- as.factor(data$Treatment)

# Create a faceted bar plot
p <- ggplot(data, aes(x=Treatment, y=CFUs, fill=Treatment)) +
  geom_bar(stat="summary", fun=mean) +
  facet_wrap(~ Day, scales="fixed") +
  labs(title="CFUs by Treatment across Different Days", y="Mean CFUs", x="Treatment") +
  theme_bw()
print(p)

# Perform Kruskal-Wallis Test by Day
kw_results <- data %>%
  group_by(Day) %>%
  summarise(Kruskal_Wallis = kruskal.test(CFUs ~ Treatment)$p.value)

# Print Kruskal-Wallis results
print(kw_results)

# Perform Dunn's Test for multiple comparisons
dunn_results <- list()
for (day in unique(data$Day)) {
  subset_data <- data[data$Day == day,]
  dunn_test <- dunnTest(CFUs ~ Treatment, data = subset_data, method = "bonferroni")
  dunn_results[[as.character(day)]] <- dunn_test
}

# Print Dunn's Test results
print(dunn_results)

# Adding significance brackets
# Please note: Adjusting manually as per p-values might be necessary
p + stat_compare_means(label = "p.signif", method = "kruskal.test", 
                       comparisons = list(c("0", "100"), c("0", "400"), c("100", "400")), 
                       label.y = c(300, 400, 500))  # Adjust these y values as needed based on your plot scale

enter image description here

Getting Error Here is the updated code using the custom Kruskal-Wallis function: I get this Error: Error in summarise(): ℹ In argument: Kruskal_Wallis = kruskal.test(CFUs ~ Treatment)$p.value. ℹ In group 1: Day = 1. Caused by error in kruskal.test(): ! argument "y" is missing, with no default Run rlang::last_trace() to see where the error occurred.

library(tidyverse)
library(ggpubr)
library(FSA)
library(rcompanion)



data <- read.csv("C:/Users/Coral HD/R/Amox/LiquidInhibition/LiquidInhib_FortyAmox.csv")

View(data)

data$Day <- as.factor(data$Day)
data$Treatment <- as.factor(data$Treatment)

# Create a faceted bar plot
p <- ggplot(data, aes(x=Treatment, y=CFUs, fill=Treatment)) +
  geom_bar(stat="summary", fun=mean, show.legend = FALSE, color = "black", width = 0.65) +
  facet_wrap(~ Day, scales="fixed") +
  ylim(0,550) +
  labs(title="", y="Mean CFUs", x="Treatment") +
  theme_bw() + scale_fill_brewer(palette = "Greys") +
  theme(axis.text = element_text(size = 11), axis.title = element_text(size = 14), 
        strip.text = element_text(size = 15))
print(p)

#Custom KW
kruskal.test <- function(x, y, paired = FALSE) {
  stats:::kruskal.test.default(x = c(x,y), g = rep(1:2, c(length(x), length(y))))
}


# Perform Kruskal-Wallis Test by Day
kw_results <- data %>%
  group_by(Day) %>%
  summarise(Kruskal_Wallis = kruskal.test(CFUs ~ Treatment)$p.value)

# Print Kruskal-Wallis results
print(kw_results)

# Perform Dunn's Test for multiple comparisons
dunn_results <- list()
for (day in unique(data$Day)) {
  subset_data <- data[data$Day == day,]
  dunn_test <- dunnTest(CFUs ~ Treatment, data = subset_data, method = "bonferroni")
  dunn_results[[as.character(day)]] <- dunn_test
}

# Print Dunn's Test results
print(dunn_results)

# Adding significance brackets
# Please note: Adjusting manually as per p-values might be necessary
p + stat_compare_means(label = "p.signif", method = "kruskal.test", 
                       comparisons = list(c("0", "100"), c("0", "400"), c("100", "400")), 
                       label.y = c(350, 400, 475))

Solution

  • This doesn't answer any questions about multiple comparisons corrections, but writing a kruskal.test() that takes x and y instead of x and g and passing it to the base-R kruskal.test appears to do what you want ...

    kruskal.test <- function(x, y, paired = FALSE) {
        stats:::kruskal.test.default(x = c(x,y), g = rep(1:2, c(length(x), length(y))))
    }
    p + stat_compare_means(label = "p.signif", method = "kruskal.test", 
                           comparisons = list(c("0", "100"), c("0", "400"), c("100", "400"))) 
    

    enter image description here

    (from the comments: "non-significant" is rendered as NS. for p-values exactly equal to 1, ns otherwise)