This is the data frame I'm trying to work on:
m <- matrix(rnorm(108, mean = 5000, sd = 1000), nrow = 36)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5',
'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1),
9)),
as.data.frame(m))
I want to know how can I perform a t-test between the rows within each "type". Here's an example of t-tests for type T1 I want:
t.test(df[1,3:5], df[2, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
t.test(df[1,3:5], df[3, 3:5])
t.test(df[1,3:5], df[4, 3:5])
I'm trying to figure out how can I loop through all rows and get all the p-values from the t-test (along with the type and treatment for identification), instead of calculating each row manually. Any help or suggestion would be greatly appreciated.
Something like this:
library(dplyr)
t_tests = df %>%
split(.$type) %>%
lapply(function(x){
t(x[3:5]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
Result:
> head(t_tests, 10)
treatment p_value type
1 C, P 0.6112274 T1
2 C, N 0.6630060 T1
3 C, S 0.5945135 T1
4 P, N 0.9388568 T1
5 P, S 0.8349370 T1
6 N, S 0.9049995 T1
7 C, P 0.3274583 T2
8 C, N 0.9755364 T2
9 C, S 0.7391661 T2
10 P, N 0.3177871 T2
library(dplyr)
t_tests = df %>%
split(.$file) %>%
lapply(function(y){
split(y, y$type) %>%
lapply(function(x){
t(x[4:6]) %>%
data.frame %>%
setNames(x$treatment) %>%
combn(2, simplify = FALSE) %>%
lapply(function(x){
data.frame(treatment = paste0(names(x), collapse = ", "),
p_value = t.test(x[,1], x[,2])$p.value)
}) %>%
do.call(rbind, .)
}) %>%
do.call(rbind, .) %>%
mutate(type = sub("[.].+", "", row.names(.)))
}) %>%
do.call(rbind, .) %>%
mutate(file = sub("[.].+", "", row.names(.)))
Result:
treatment p_value type file
1 C, P 0.3903450 T1 file1
2 C, N 0.3288727 T1 file1
3 C, S 0.0638599 T1 file1
4 P, N 0.6927599 T1 file1
5 P, S 0.1159615 T1 file1
6 N, S 0.2184015 T1 file1
7 C, P 0.1147805 T2 file1
8 C, N 0.4961888 T2 file1
9 C, S 0.9048607 T2 file1
10 P, N 0.4203666 T2 file1
11 P, S 0.3425908 T2 file1
12 N, S 0.7262478 T2 file1
13 C, P 0.6300293 T3 file1
14 C, N 0.8255837 T3 file1
15 C, S 0.7140522 T3 file1
16 P, N 0.4768694 T3 file1
17 P, S 0.3992130 T3 file1
18 N, S 0.8740219 T3 file1
19 C, P 0.2434270 T4 file1
20 C, N 0.2713622 T4 file1
Note about edit:
OP wanted an extra top level file
to be added to the data, one can simply add another split
+ lapply
and do.call
at the end.
New Data:
m <- matrix(rnorm(324, mean = 5000, sd = 1000), nrow = 108)
colnames(m) <- paste('V', 1:3, sep = '')
df <- data.frame(type = factor(rep(c('T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'T8', 'T9'), each = 4)),
treatment = factor(rep(rep(c('C','P', 'N', 'S'), each = 1), 9)),
file = factor(rep(c("file1", "file2", "file3"), each = 36)),
as.data.frame(m))