Search code examples
rdataframepairwise.wilcox.test

Output in the desired format from the test.wilcoxon function


My data.frame:

data <-structure(list(col1 = c(125L, 654L, 896L, 154L, 865L, 148L), 
    col2 = c(489L, 657L, 198L, 269L, 789L, 456L), col3 = c(741L, 
    852L, 963L, 987L, 951L, 632L), col4 = c(124L, 785L, 874L, 
    965L, 563L, 145L), col5 = c(963L, 146L, 259L, 367L, 365L, 
    189L), col6 = c(741L, 777L, 100L, 200L, 956L, 452L), col7 = c(456L, 
    666L, 300L, 778L, 888L, 999L), col8 = c(254L, 732L, 400L, 
    500L, 600L, 700L), col9 = c(555L, 638L, 127L, 489L, 545L, 
    54L), col10 = c(921L, 549L, 111L, 222L, 354L, 355L), GROUP = c(1L, 
    2L, 3L, 1L, 2L, 3L)), class = "data.frame", row.names = c(NA, 
-6L))

My fun:

combination <- list(c(1,2),c(1,3),c(2,3),c(4,5),c(4,6),c(5,6),c(7,8),c(7,9),c(8,9))

wilcox.fun <- function(df, id_group){
  df = df[df$GROUP%in%id_group,]
  x <- function(dat) { 
    do.call(rbind, Map(function(x, y) {
      col1 <- dat[[x[1]]]
      col2 <- dat[[x[2]]]
        test <- wilcox.test(col1, col2,conf.int = TRUE)
        print("work")
        median.group.1 <- median((dat[[x[1]]]))
        median.group.2 <- median((dat[[x[2]]]))
        diff.1 <- -round(test$estimate, 2)
      data.frame(NAME = sprintf('Group %s by Group %s',x[1],x[2]),
       
        stats=paste(x[1],":",median.group.1,":",x[2],median.group.2),
        
        diff=paste(x[1],"-",x[2],diff.1,collapse = "\n")
                 
      
    

      )
    }, combination))
  }
  return (purrr::map_df(split(data, data$GROUP),x, .id ="GROUP"))
}
  
    
result <- wilcox.fun(data, c("1","2"))



names(result)[3] <- "stats"
names(result)[4] <- "diff"

I would like the function to output data in this format:

|GROU| 1    | 2  | 3   |  4  | 5   |  6  | 7 | 8 | 9   |       dif      |
     |------|----|-----|-----|-----|-----|---|---|-----|----------------|        |
|1   | 139.5| 379|864  |544.5|665  |470.5|617|377|522  | 1 - 2 239.5    |
|    |                                                 | 1 - 3 724.5    |    
|    |                                                 | 2 - 3 485      |
|    |                                                 | 4 - 5 120.5    |
|    |                                                 | etc            |
|--- |------|----|-----|-----|-----|-----|---|---|-----|----------------|
|2   |759.5 |723 |901.5|674  |255.5|866.5|777|666|591.5|1 - 2 -36.5     |   |
|    |                                                 |1 - 3 142       |   |
|    |                                                 |2 - 3 178.5     |   |
|    |                                                 |4 - 5 -418.5    |   |
|    |                                                 | etc            |   |

that is, I want a data frame to be formed in which medians are recorded in each new column without repetition, and all comparisons of medians are in one column. I don't quite understand how to form such a data frame


Solution

  • Update

    wilcox.fun <- function(data, id_groups, combination){
      
      result_list <- list()
      for (g in id_groups){
        
        df = as.matrix(data[data$GROUP %in% g,])
        df <- df[,unique(unlist(combination))]
        med <- apply(df, 2, median)
        
        result <- data.frame(matrix(NA, ncol=length(med)+2, nrow=1))
        result[1,] <- c(g, med, NA)
        
        for (k in 1:(length(combination))) {
          
          i <- combination[[k]][1]
          j <- combination[[k]][2]
          test <- wilcox.test(df[,i], df[,j],conf.int = TRUE)
          
          diff.1 <- -round(test$estimate, 2)
          result[k,length(med)+2] <- paste(i,"-", j, diff.1)
        }
        result_list[[g]] <- result
      }
      result_new <- do.call(rbind, result_list)
      names(result_new) <- c("GROUP", as.character(unique(unlist(combination))), "dif")
    
      return(result_new)
    }
    
    result <- wilcox.fun(data, c("1", "2"),combination)
    result
       GROUP     1    2     3     4     5     6    7    8     9          dif
    1.1     1 139.5  379   864 544.5   665 470.5  617  377   522  1 - 2 239.5
    1.2  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>  1 - 3 724.5
    1.3  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    2 - 3 485
    1.4  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>  4 - 5 120.5
    1.5  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    4 - 6 -74
    1.6  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA> 5 - 6 -194.5
    1.7  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>   7 - 8 -240
    1.8  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    7 - 9 -95
    1.9  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    8 - 9 145
    2.1     2 759.5  723 901.5   674 255.5 866.5  777  666 591.5  1 - 2 -36.5
    2.2  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    1 - 3 142
    2.3  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>  2 - 3 178.5
    2.4  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA> 4 - 5 -418.5
    2.5  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>  4 - 6 192.5
    2.6  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>    5 - 6 611
    2.7  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>   7 - 8 -111
    2.8  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA> 7 - 9 -185.5
    2.9  <NA>  <NA> <NA>  <NA>  <NA>  <NA>  <NA> <NA> <NA>  <NA>  8 - 9 -74.5