Suppose I had the following data:
Class,a,b,c,d,e,f,g,h,i,j,k,l,m A,0.1,0.4,0.4,0.7,0.1,0.1,0.9,0.5,0.5,0.9,0.6,0.1,0.9 A,0.0,0.3,0.7,0.1,0.2,0.1,0.3,0.2,1.0,0.9,0.6,0.6,0.1 B,0.6,0.3,0.4,0.6,0.3,0.8,0.3,0.0,0.8,0.9,0.6,0.7,0.5 B,0.8,0.3,0.8,0.8,0.3,0.7,0.3,0.7,0.7,0.4,0.9,0.3,0.8 C,0.8,0.1,0.0,0.0,0.2,0.0,0.7,0.3,0.1,0.6,0.8,0.0,0.3 C,0.3,0.0,0.7,1.0,0.8,0.3,0.6,0.4,0.9,0.9,0.1,0.9,1.0 D,0.9,0.7,1.0,0.1,0.2,0.5,0.9,0.9,0.6,0.8,0.6,0.4,0.2 D,0.6,0.2,0.8,0.8,0.1,0.3,0.0,0.0,0.6,0.3,0.6,0.9,0.3 E,1.0,0.5,0.5,0.0,0.7,0.9,1.0,0.1,0.5,0.5,0.1,0.9,0.6 E,0.5,0.8,0.5,0.1,0.7,0.6,0.1,0.8,0.2,0.1,0.7,0.6,0.4 F,0.6,0.4,0.6,0.2,0.5,0.4,0.2,0.2,0.3,0.4,0.9,0.5,0.7 F,0.9,0.6,0.8,0.6,0.6,0.3,0.8,0.5,0.4,0.4,0.5,0.7,0.4 G,0.2,0.1,0.6,0.4,0.3,0.8,0.4,0.1,0.9,0.7,0.0,0.5,0.8 G,0.2,0.3,0.2,0.8,0.3,0.6,0.2,0.6,0.9,0.1,0.2,0.1,0.3 H,0.7,0.4,0.7,0.2,0.5,0.5,0.5,0.9,0.8,0.4,0.9,0.7,0.5 H,0.1,0.6,0.5,0.7,0.5,0.4,0.5,0.4,0.1,0.6,0.1,0.3,0.5 I,0.6,0.8,0.7,0.6,0.3,0.5,0.2,0.6,0.5,0.3,0.9,0.8,0.5 I,0.1,0.0,0.5,0.2,0.3,0.2,0.4,0.9,0.9,0.8,0.5,0.3,0.2 J,0.6,0.1,0.2,0.3,0.3,0.6,0.5,0.9,0.0,0.2,0.4,0.8,0.9 J,0.3,0.7,0.4,0.1,0.4,0.1,0.7,0.5,0.1,0.0,0.1,0.2,0.6 K,0.1,0.9,0.5,0.0,0.0,0.3,0.0,0.7,0.2,0.0,0.0,0.6,0.6 K,0.9,0.6,0.1,0.9,0.3,0.2,0.1,0.2,0.2,0.1,0.7,0.8,0.0 L,1.0,0.6,0.0,0.0,0.8,0.5,0.7,0.9,0.3,0.5,0.4,0.9,0.4 L,0.6,0.2,0.5,0.2,0.7,0.6,0.4,0.5,0.8,0.6,0.8,0.3,0.9 M,0.7,0.1,0.1,0.8,0.4,1.0,0.9,0.1,0.1,0.5,0.5,0.6,0.3 M,0.8,0.4,0.2,0.4,0.2,0.2,0.3,0.5,0.5,0.3,0.3,1.0,0.3 N,0.0,0.4,0.5,0.1,0.6,0.5,0.3,0.4,0.8,0.7,0.1,0.2,0.8 N,0.9,0.4,1.0,0.9,0.7,0.5,0.7,0.4,0.8,0.8,0.1,0.7,0.2 O,0.2,0.1,1.0,0.2,0.0,0.8,0.4,0.9,0.1,1.0,0.8,0.3,0.5 O,0.6,1.0,1.0,0.8,1.0,0.6,0.4,0.3,0.2,0.8,0.5,0.0,0.1 P,0.2,0.1,0.7,0.8,0.3,0.2,0.4,0.4,1.0,0.2,0.7,0.1,0.0 P,0.3,0.6,0.6,0.5,0.9,0.1,0.9,0.4,0.5,0.2,0.7,0.8,0.2 Q,0.9,0.7,0.6,0.7,0.4,0.3,0.3,0.8,0.6,0.6,1.0,0.1,0.7 Q,0.6,0.9,0.9,0.1,0.8,0.7,0.0,0.1,0.7,0.0,0.9,0.7,0.8 R,0.1,0.7,0.5,0.9,0.9,0.1,1.0,0.1,0.6,0.2,0.5,0.5,0.3 R,0.1,0.6,0.9,0.6,0.6,0.8,1.0,0.3,0.7,0.9,0.1,0.8,0.2
structure(list(Class = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L,
12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L, 17L, 17L, 18L,
18L), .Label = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"J", "K", "L", "M", "N", "O", "P", "Q", "R"), class = "factor"),
a = c(0.7, 0.4, 0.9, 0.5, 0.4, 0.5, 0, 0.3, 0.7, 0.7, 0.6,
0.5, 0.4, 0.1, 0.3, 0.1, 0.3, 0.4, 0.5, 0.8, 0.8, 0.6, 0.2,
0.3, 0.2, 0.6, 0.6, 0.4, 0.9, 0.1, 0.6, 0.6, 0.7, 0.7, 1,
0.2), b = c(0.3, 0.1, 0.3, 1, 0.2, 0.8, 0.9, 0.5, 0.6, 0.3,
0.2, 0.9, 0.9, 0.8, 0, 0.2, 0.8, 0.7, 0.8, 0.8, 0.5, 0, 0.3,
0.9, 0.4, 0.9, 0.1, 0.4, 0.5, 0.7, 0.8, 0.9, 0.1, 0.8, 1,
0.7), c = c(0.2, 0.6, 0.7, 0.5, 0.4, 0.7, 0.9, 0.1, 0.2,
0.8, 0.8, 0.9, 0.2, 0.6, 0.4, 0.3, 0.1, 0.9, 0, 0.8, 0.5,
0.9, 0.4, 0.9, 0.8, 0.4, 0.9, 0.7, 0.9, 1, 0.2, 0.1, 1, 0.9,
0.8, 0.8), d = c(0.8, 0.1, 0.5, 0.8, 0.3, 0.2, 0.9, 0.9,
0, 0.3, 0.7, 0, 0.1, 0, 0, 0.8, 1, 0.8, 0.6, 1, 0.6, 0.6,
0.2, 0.1, 1, 0.1, 0.7, 0.3, 0.7, 0.3, 0.9, 0.8, 0.1, 0.2,
0.1, 0.4), e = c(0.6, 0.6, 0.3, 0.6, 0.7, 0.6, 0.9, 0.8,
0.6, 0.4, 0.6, 0.7, 0.7, 0.2, 1, 1, 1, 0.5, 0.4, 0.5, 0.6,
0.7, 0.1, 0.1, 0.6, 0.2, 0.4, 1, 0.1, 0.3, 0.7, 0, 0.4, 0.2,
0.7, 0.1), f = c(0.9, 0.2, 0.4, 0.8, 0, 0.2, 1, 0.2, 0.1,
0.1, 0.4, 1, 0.1, 0.9, 0.2, 0.2, 0.3, 0.6, 0.9, 0.2, 0.5,
0.4, 0.4, 0.2, 0.6, 0.1, 0.6, 0.1, 0.5, 0.1, 0.6, 0.9, 0.5,
0.4, 0.5, 0.8), g = c(0.6, 0.3, 0.9, 0.1, 0.9, 0.3, 0.1,
0.2, 0, 0.4, 1, 0.3, 0.7, 0.8, 0.5, 0.5, 0.6, 0.2, 0.2, 0,
1, 0.6, 0.2, 0.7, 0.6, 0.6, 0.1, 0.4, 0.6, 0.2, 0.6, 0.4,
0.6, 0.7, 0.3, 1), h = c(0.1, 0.8, 0.3, 0.3, 0.3, 0.8, 0.3,
0.7, 0.4, 0.8, 0.3, 0.1, 0.5, 0.7, 0.1, 0.3, 0.7, 0.4, 0.7,
0.5, 0.4, 0.1, 0.9, 0.8, 0.9, 0.7, 0.3, 0.1, 0.3, 0.1, 0.2,
0.1, 0.9, 0.3, 0.1, 0.6), i = c(0.8, 0.8, 0.4, 0.9, 0.5,
0.8, 0.6, 0.6, 0.2, 0.7, 0.1, 0.8, 0.1, 0.5, 0.7, 0.6, 0.8,
0.4, 0.2, 0.9, 0.7, 0.8, 0.5, 0.8, 0.3, 0.3, 1, 0.6, 0.9,
0.9, 0.7, 0.5, 0.1, 1, 0.5, 0.1), j = c(0.5, 0, 0.4, 0.7,
0.5, 0.6, 0.3, 0.9, 0.1, 0.4, 1, 0.5, 0.7, 0.4, 0.3, 0.9,
0, 0.5, 0.7, 0.4, 0, 0.1, 0.4, 0.3, 0.6, 0.9, 0.5, 0.5, 0.9,
0.5, 0.5, 0.3, 0.1, 0.1, 0.9, 0.7), k = c(0.8, 0.1, 0, 0.5,
0.7, 0.9, 0.4, 0.9, 0.3, 0.8, 0.7, 0.8, 0.2, 0.2, 0.8, 0.4,
0.8, 0.7, 0.2, 0.6, 0.5, 0.4, 1, 0.2, 0.7, 0.3, 0.1, 0, 0.4,
0.7, 0.8, 0.3, 0.3, 0.8, 0.5, 0.9), l = c(0.8, 0.6, 0.3,
0.6, 0.6, 0.1, 0.2, 0.3, 0.7, 0.6, 0.1, 0.8, 1, 1, 0.5, 0.3,
0.3, 0, 0.4, 0.1, 0.5, 0.6, 0.9, 0.4, 0.6, 0.4, 0.3, 0.4,
0.5, 0.5, 0.5, 0.5, 0.9, 0.2, 0.5, 0.9), m = c(0.4, 0, 0.7,
0.1, 0.9, 0.8, 0.6, 0.1, 0.2, 0.2, 0.9, 0.9, 0.4, 0.4, 0.9,
0.6, 0.2, 0.1, 0.7, 0.5, 0.2, 0.8, 0.4, 0.6, 0.9, 0.7, 0,
0.8, 0.7, 0.2, 0.1, 0.8, 0.2, 0.2, 0.3, 0.2)), row.names = c(NA,
-36L), class = "data.frame")
I want to run unpaired t-test between the rows (i.e. A-B, A-C, B-C, etc) within a column (i.e. A-B, A-C, B-C, etc. within "a", then A-B, A-C, B-C, etc. within "b", and so on). Is there a simple way to do this with R rather than running each gene individually? Ideally, I'd like an output list of p-values for each sample pairing in each gene.
Thanks so much, L.
We can use combn
to get the pairwise combination of 'Class' column values and use in the t.test
lstOut <- combn(unique(df1$Class), 2, FUN = function(x) {
dat1 <- subset(df1, Class == x[1])
dat2 <- subset(df1, Class == x[2])
Map(function(x, y) tryCatch(t.test(x, y),
error = function(e) NA), dat1[-1], dat2[-1])
}, simplify = FALSE)
names(lstOut) <- combn(as.character(unique(df1$Class)), 2, FUN = paste, collapse="_")
The list
output can be tidy
ied and converted to a single data.frame
library(broom)
library(purrr)
out <- map_depth(lstOut, .depth = 2, tidy)
out2 <- map_dfr(out, ~bind_rows(.x, .id = 'colname'), .id = 'classCompare')