I have a matrix and I want to identify the number of times that each character appears in the same position between all pairwise.
A example of the way I'm doing is below, but my matrix has 10,000 rows and it's taking too long.
# This code will generate a dataframe with one row for each pair and columns that
# count the number of position match each letter have
my_letters <- c("A", "B", "C", "D")
size_vector <- 175
n_vectors <- 10
indexes_vectors <- seq_len(n_vectors)
mtx <- sapply(indexes_vectors,
function(i) sample(my_letters, n_vectors, replace = TRUE))
rownames(mtx) <- indexes_vectors
df <- as.data.frame(t(combn(indexes_vectors, m = 2)))
colnames(df) <- c("index_1", "index_2")
for(l in my_letters){
cat(l, "\n")
df[,l] <- apply(df[,1:2], 1,
function(ids) {
sum(mtx[ids[1],] == mtx[ids[2],] &
mtx[ids[1],] == l, na.rm = TRUE)
})
}
m1 <- t(sapply(1:nrow(df), function(i)
table(factor(mtx[df[i,1],][mtx[df[i,1],] == mtx[df[i,2],]],
levels = my_letters))))
cbind(df, m1)
> V1 V2 A B C D 1 1 2 0 0 1 1 2 1 3 1 0 1 1 3 1 4 1 0 2 1 4 1 5 0 0 1 0 5 1 6 2 0 2 0 6 1 7 0 0 1 0 7 1 8 1 0 1 1 8 1 9 0 0 1 0 9 1 10 1 0 1 1 10 2 3 0 0 1 1 11 2 4 1 1 1 2 12 2 5 0 0 0 1 13 2 6 1 0 2 1 14 2 7 1 0 0 1 15 2 8 1 0 0 0 16 2 9 2 0 0 0 17 2 10 1 0 1 0 18 3 4 0 0 0 0 19 3 5 0 2 1 0 20 3 6 1 1 2 1 21 3 7 0 1 0 0 22 3 8 1 1 0 0 23 3 9 0 1 2 0 24 3 10 0 0 1 0 25 4 5 1 1 0 1 26 4 6 2 1 1 0 27 4 7 1 0 1 1 28 4 8 0 1 0 0 29 4 9 1 0 0 0 30 4 10 2 0 0 0 31 5 6 0 2 0 0 32 5 7 0 1 3 1 33 5 8 0 1 2 0 34 5 9 1 0 2 0 35 5 10 0 0 2 0 36 6 7 0 0 0 0 37 6 8 1 1 0 0 38 6 9 0 0 1 0 39 6 10 3 0 1 0 40 7 8 0 1 1 0 41 7 9 1 0 1 0 42 7 10 0 0 1 0 43 8 9 1 1 1 1 44 8 10 0 0 1 0 45 9 10 0 0 0 0