Search code examples
rdictionarygroup

How to do a test to all sequential pairs of columns by groups in R


I am trying to apply wilcoxon test to all sequential pairs of columns of df. I have no problem doing it using col<-seq(2,ncol(df),by=2).

But I don't know how to do it by group, in this case by skul.

library(tidyverse)

df <- tibble(skul = c(rep('a',15), rep('b', 16)),
             x1i = sample(1:10, 31, replace = TRUE),
             x1f = sample(1:10, 31, replace = TRUE),
             x2i = sample(1:10, 31, replace = TRUE),
             x2f = sample(1:10, 31, replace = TRUE))

col<-seq(2,ncol(df),by=2)

# I have to select out the column "skul"

map2(df[,col], df[,c(-col, -1)], wilcox.test)
#> Warning in wilcox.test.default(.x[[i]], .y[[i]], ...): cannot compute exact p-
#> value with ties

#> Warning in wilcox.test.default(.x[[i]], .y[[i]], ...): cannot compute exact p-
#> value with ties
#> $x1i
#> 
#>  Wilcoxon rank sum test with continuity correction
#> 
#> data:  .x[[i]] and .y[[i]]
#> W = 486, p-value = 0.9435
#> alternative hypothesis: true location shift is not equal to 0
#> 
#> 
#> $x2i
#> 
#>  Wilcoxon rank sum test with continuity correction
#> 
#> data:  .x[[i]] and .y[[i]]
#> W = 401, p-value = 0.2613
#> alternative hypothesis: true location shift is not equal to 0

Created on 2022-04-13 by the reprex package (v2.0.1)


Solution

  • Is this what you're looking for:

    df <- dplyr::tibble(skul = c(rep('a',15), rep('b', 16)),
                        x1i = sample(1:10, 31, replace = TRUE),
                        x1f = sample(1:10, 31, replace = TRUE),
                        x2i = sample(1:10, 31, replace = TRUE),
                        x2f = sample(1:10, 31, replace = TRUE))
    
    
    stems <- unique(gsub("[if]", "", names(df)[-1]))
    out <- by(df, list(df$skul), function(data){
      lapply(stems, function(x){
        inds <- grep(x, names(df))
        n <- names(df)[inds]
        cmd <- paste0("wilcox.test(data$", n[1], ", data$", n[2], ")")
        eval(parse(text=cmd))
      })})
    #> Warning in wilcox.test.default(data$x1i, data$x1f): cannot compute exact p-value
    #> with ties
    #> Warning in wilcox.test.default(data$x2i, data$x2f): cannot compute exact p-value
    #> with ties
    #> Warning in wilcox.test.default(data$x1i, data$x1f): cannot compute exact p-value
    #> with ties
    #> Warning in wilcox.test.default(data$x2i, data$x2f): cannot compute exact p-value
    #> with ties
    out
    #> : a
    #> [[1]]
    #> 
    #>  Wilcoxon rank sum test with continuity correction
    #> 
    #> data:  data$x1i and data$x1f
    #> W = 110, p-value = 0.9334
    #> alternative hypothesis: true location shift is not equal to 0
    #> 
    #> 
    #> [[2]]
    #> 
    #>  Wilcoxon rank sum test with continuity correction
    #> 
    #> data:  data$x2i and data$x2f
    #> W = 75.5, p-value = 0.1266
    #> alternative hypothesis: true location shift is not equal to 0
    #> 
    #> 
    #> ------------------------------------------------------------ 
    #> : b
    #> [[1]]
    #> 
    #>  Wilcoxon rank sum test with continuity correction
    #> 
    #> data:  data$x1i and data$x1f
    #> W = 113.5, p-value = 0.5952
    #> alternative hypothesis: true location shift is not equal to 0
    #> 
    #> 
    #> [[2]]
    #> 
    #>  Wilcoxon rank sum test with continuity correction
    #> 
    #> data:  data$x2i and data$x2f
    #> W = 152, p-value = 0.3719
    #> alternative hypothesis: true location shift is not equal to 0
    

    Created on 2022-04-13 by the reprex package (v2.0.1)