rpivottidyversepairwise.wilcox.test

How to melt pairwise.wilcox.test output using dplyr?


I want to apply pairwise.wilcox.test for multiple independent variables at a time and then want to have the output in long format. For a particular Wavelength, I could do it using the following code

try <- pairwise.wilcox.test(df$WV_350, as.factor(df$Class), p.adjust.method="bonf")$p.value 

and the ultimate output what I want is

reshape2::melt(try)
#>  Var1 Var2      value
#> 1     2    1 1.00000000
#> 2     3    1 0.07936508
#> 3     4    1 0.07936508
#> 4     5    1 0.07936508
#> 5     2    2         NA
#> 6     3    2 0.07936508
#> 7     4    2 0.07936508
#> 8     5    2 0.07936508
#> 9     2    3         NA
#> 10    3    3         NA
#> 11    4    3 1.00000000
#> 12    5    3 0.74912899
#> 13    2    4         NA
#> 14    3    4         NA
#> 15    4    4         NA
#> 16    5    4 0.55555556

Now to apply it for all the wavelengths at a time, I have used dplyr package (Newest version 1.0.0) like

library(tidyverse)
tbl_df(df)%>% 
  pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>% 
  group_by(Wavelengths) %>% 
  summarize(out = pairwise.wilcox.test(value, as.factor(Class), p.adjust.method="bonf")$p.value)

which returns me

#> `summarise()` regrouping output by 'Wavelengths' (override with `.groups` argument)
#> # A tibble: 16 x 2
#> # Groups:   Wavelengths [4]
#>    Wavelengths pval[,1]    [,2]   [,3]   [,4]
#>    <chr>          <dbl>   <dbl>  <dbl>  <dbl>
#>  1 WV_350        1      NA      NA     NA    
#>  2 WV_350        0.0794  0.0794 NA     NA    
#>  3 WV_350        0.0794  0.0794  1     NA    
#>  4 WV_350        0.0794  0.0794  0.749  0.556
#>  5 WV_351        1      NA      NA     NA    
#>  6 WV_351        0.0794  0.0794 NA     NA    
#>  7 WV_351        0.0794  0.0794  1     NA    
#>  8 WV_351        0.0794  0.0794  0.556  0.556
#>  9 WV_352        1      NA      NA     NA    
#> 10 WV_352        0.0794  0.0794 NA     NA    
#> 11 WV_352        0.0794  0.0794  1     NA    
#> 12 WV_352        0.0794  0.0794  0.556  0.749
#> 13 WV_353        1      NA      NA     NA    
#> 14 WV_353        0.0794  0.0794 NA     NA    
#> 15 WV_353        0.0794  0.0794  1     NA    
#> 16 WV_353        0.0794  0.0794  0.556  0.317

Now how to have the output in long format like

Wavelength Var1 Var2      value 

Data

df = structure(list(Class = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), WV_350 = c(0.0196, 0.0206, 
0.023, 0.0264, 0.029, 0.0201, 0.0181, 0.0216, 0.0225, 0.019, 
0.0165, 0.0121, 0.0129, 0.0123, 0.0149, 0.0137, 0.0116, 0.0151, 
0.0138, 0.0167, 0.0149, 0.0112, 0.0107, 0.01, 0.0099), WV_351 = c(0.0197, 
0.0206, 0.0229, 0.0265, 0.029, 0.0199, 0.0183, 0.0216, 0.0225, 
0.0187, 0.0165, 0.0118, 0.0127, 0.0122, 0.0148, 0.0138, 0.0114, 
0.0145, 0.0132, 0.0164, 0.0144, 0.0108, 0.01, 0.0093, 0.0095), 
    WV_352 = c(0.0199, 0.0207, 0.0233, 0.027, 0.0299, 0.0203, 
    0.0186, 0.0219, 0.0232, 0.019, 0.0169, 0.0124, 0.0133, 0.0126, 
    0.0152, 0.0145, 0.0118, 0.0148, 0.0132, 0.0168, 0.0148, 0.0111, 
    0.0102, 0.0096, 0.0098), WV_353 = c(0.0204, 0.0213, 0.0238, 
    0.0277, 0.0307, 0.0208, 0.0194, 0.0229, 0.0241, 0.0199, 0.0173, 
    0.013, 0.0142, 0.0134, 0.0161, 0.0152, 0.0126, 0.0153, 0.0137, 
    0.0175, 0.0151, 0.0116, 0.0105, 0.01, 0.0098)), row.names = c(NA, 
25L), class = "data.frame")  

Solution

  • I could able to solve the problem using rstatix package which "provides a simple and intuitive pipe friendly framework, coherent with the 'tidyverse' design philosophy for performing basic statistical tests".

    library(tidyverse)
    library(rstatix)
    
    as_tibble(df)%>% 
      pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>% 
      mutate(Class = as.factor(Class)) %>% 
      group_by(Wavelengths) %>% 
      pairwise_wilcox_test(value~Class, p.adjust.method="bonf")
    

    which returns the following output

    #> # A tibble: 40 x 10
    #>   Wavelengths .y.   group1 group2    n1    n2 statistic     p p.adj
    #> * <chr>       <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl>
    #> 1 WV_350      value 1      2          5     5      20   0.151 1    
    #> 2 WV_350      value 1      3          5     5      25   0.008 0.079
    #> 3 WV_350      value 1      4          5     5      25   0.008 0.079
    #> 4 WV_350      value 1      5          5     5      25   0.008 0.079
    #> 5 WV_350      value 2      3          5     5      25   0.008 0.079
    #> 6 WV_350      value 2      4          5     5      25   0.008 0.079
    #> 7 WV_350      value 2      5          5     5      25   0.008 0.079
    #> 8 WV_350      value 3      4          5     5      10   0.69  1    
    #> 9 WV_350      value 3      5          5     5      21.5 0.075 0.749
    #> 10 WV_350      value 4      5          5     5      22   0.056 0.556
    #> # ... with 30 more rows, and 1 more variable: p.adj.signif <chr>