Search code examples
rloopsdataframet-test

R: t-test between rows within each factor level


This is the data frame I'm trying to work on:

m <- matrix(rnorm(108, mean = 5000, sd = 1000), nrow = 36) 
colnames(m) <- paste('V', 1:3, sep = '') 
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 
            'T6', 'T7', 'T8', 'T9'), each = 4)), 
            treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 
            9)), 
            as.data.frame(m)) 

I want to know how can I perform a t-test between the rows within each "type". Here's an example of t-tests for type T1 I want:

t.test(df[1,3:5], df[2, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])

t.test(df[1,3:5], df[3, 3:5])

t.test(df[1,3:5], df[4, 3:5])

I'm trying to figure out how can I loop through all rows and get all the p-values from the t-test (along with the type and treatment for identification), instead of calculating each row manually. Any help or suggestion would be greatly appreciated.


Solution

  • Something like this:

    library(dplyr)
    t_tests = df %>%
      split(.$type) %>%
      lapply(function(x){
        t(x[3:5]) %>%
          data.frame %>%
          setNames(x$treatment) %>%
          combn(2, simplify = FALSE) %>%
          lapply(function(x){
             data.frame(treatment = paste0(names(x), collapse = ", "), 
                       p_value = t.test(x[,1], x[,2])$p.value)
          }) %>%
          do.call(rbind, .) 
      }) %>% 
      do.call(rbind, .) %>%
      mutate(type = sub("[.].+", "", row.names(.))) 
    

    Result:

    > head(t_tests, 10)
       treatment   p_value type
    1       C, P 0.6112274   T1
    2       C, N 0.6630060   T1
    3       C, S 0.5945135   T1
    4       P, N 0.9388568   T1
    5       P, S 0.8349370   T1
    6       N, S 0.9049995   T1
    7       C, P 0.3274583   T2
    8       C, N 0.9755364   T2
    9       C, S 0.7391661   T2
    10      P, N 0.3177871   T2
    

    Edits (Added an extra level "file" to the dataset):

    library(dplyr)
    t_tests = df %>%
      split(.$file) %>%
      lapply(function(y){
        split(y, y$type) %>%
        lapply(function(x){
          t(x[4:6]) %>%
            data.frame %>%
            setNames(x$treatment) %>%
            combn(2, simplify = FALSE) %>%
            lapply(function(x){
              data.frame(treatment = paste0(names(x), collapse = ", "), 
                         p_value = t.test(x[,1], x[,2])$p.value)
            }) %>%
            do.call(rbind, .) 
        }) %>% 
          do.call(rbind, .) %>%
          mutate(type = sub("[.].+", "", row.names(.)))
      }) %>% 
      do.call(rbind, .) %>%
      mutate(file = sub("[.].+", "", row.names(.)))
    

    Result:

       treatment   p_value type  file
    1       C, P 0.3903450   T1 file1
    2       C, N 0.3288727   T1 file1
    3       C, S 0.0638599   T1 file1
    4       P, N 0.6927599   T1 file1
    5       P, S 0.1159615   T1 file1
    6       N, S 0.2184015   T1 file1
    7       C, P 0.1147805   T2 file1
    8       C, N 0.4961888   T2 file1
    9       C, S 0.9048607   T2 file1
    10      P, N 0.4203666   T2 file1
    11      P, S 0.3425908   T2 file1
    12      N, S 0.7262478   T2 file1
    13      C, P 0.6300293   T3 file1
    14      C, N 0.8255837   T3 file1
    15      C, S 0.7140522   T3 file1
    16      P, N 0.4768694   T3 file1
    17      P, S 0.3992130   T3 file1
    18      N, S 0.8740219   T3 file1
    19      C, P 0.2434270   T4 file1
    20      C, N 0.2713622   T4 file1
    

    Note about edit:

    OP wanted an extra top level file to be added to the data, one can simply add another split + lapply and do.call at the end.

    New Data:

    m <- matrix(rnorm(324, mean = 5000, sd = 1000), nrow = 108) 
    colnames(m) <- paste('V', 1:3, sep = '') 
    df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9'), each = 4)), 
                     treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 9)), 
                     file = factor(rep(c("file1", "file2", "file3"), each = 36)), 
                     as.data.frame(m))