Search code examples
rmatrixdplyrtidyverse

R, dplyr: Get number of repeated cells in list of matrices


EDIT

Adding an example with 4x4 matrices to test

Matrices:

> mat1
[,1] [,2] [,3] [,4]
[1,]    0    0    1    0
[2,]    0    1    0    1
[3,]    1    1    1    1
[4,]    0    1    0    1
> mat2
[,1] [,2] [,3] [,4]
[1,]    1    1    1    0
[2,]    1    1    1    1
[3,]    1    1    1    1
[4,]    0    1    0    0
> mat3
[,1] [,2] [,3] [,4]
[1,]    1    1    0    1
[2,]    1    0    0    1
[3,]    0    0    0    1
[4,]    1    0    1    0
> mat4
[,1] [,2] [,3] [,4]
[1,]    0    0    1    0
[2,]    0    0    0    0
[3,]    0    0    1    1
[4,]    0    0    1    1


sample_list <- list(mat1, mat2, mat3, mat4)
m12     <- as.matrix(sample_list[[1]] + sample_list[[2]]) 
sum_row <- rowSums(m12*(m12 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m12 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame()%>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep2    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep2 = sum_row - num_rep) %>%
           select(., id, sum_rep2)


m123    <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]]) 
sum_row <- rowSums(m123*(m123 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m123 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep3    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep3 = sum_row - num_rep) %>%
           select(., id, sum_rep3)


m1234   <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]] + sample_list[[4]]) 
sum_row <- rowSums(m1234*(m1234 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m1234 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep4    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep4 = sum_row - num_rep) %>%
           select(., id, sum_rep4)


all_reps <- full_join(rep2, rep3) %>%
            full_join(., rep4)

The final output:

id sum_rep2 sum_rep3 sum_rep4
1        1        3        4
2        2        4        4
3        4        5        7
4        1        1        3

ORIGINAL POST

I have a list of adjacency matrices. I'm trying to count up the number of times a filled cell is repeated for each observation. My goal is to do this with the matrices sequentially. So, the number of repeats with M1 and M2, then M1, M2, and M3, etc.

I'm trying to create a function to do this with as many matrices as provided in the list. The code below is what I've been using to do this one step at a time.

Example matrices:

set.seed(0)

mat1 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat2 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat3 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat4 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)

sample_list <- list(mat1, mat2, mat3, mat4)

Code I've used:

Here I calculate the sum of cells by row and the number of filled cells greater than 1. I subtract the number of cells from the total row sums to get the total number of repeats.

m12     <- as.matrix(sample_list[[1]] + sample_list[[2]]) 
sum_row <- rowSums(m12*(m12 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m12 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame()%>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep2    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep2 = sum_row - num_rep) %>%
           select(., id, sum_rep2)


m123    <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]]) 
sum_row <- rowSums(m123*(m123 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m123 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep3    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep3 = sum_row - num_rep) %>%
           select(., id, sum_rep3)



m1234   <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]] + sample_list[[4]]) 
sum_row <- rowSums(m1234*(m1234 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m1234 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep4    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep4 = sum_row - num_rep) %>%
           select(., id, sum_rep4)


all_reps <- full_join(rep2, rep3) %>%
            full_join(., rep4)

The final output, all_reps gives me this dataset (with the random matrices I created in the first chunk of code):

  id sum_rep2 sum_rep3 sum_rep4
1   1        1        7       10
2   2        3        8       10
3   3        1        5       10
4   4        0        1        4
5   5        2        4        9
6   6        0        2        4
7   7        3        6       10
8   8        5        8       12
9   9        3        7       11
10 10        3        9       12

Is there a way to use a loop or an apply function to do this in a more automated way, that also will take a list of more matrices?

Matrices:

sample_list[[1]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    1    0    1    1    0    0    1    1     1
 [2,]    0    0    0    1    1    1    0    1    1     0
 [3,]    1    0    0    1    1    0    1    1    1     0
 [4,]    0    0    0    0    0    1    1    0    0     1
 [5,]    0    0    0    1    0    1    1    0    0     1
 [6,]    1    0    0    0    1    0    1    0    0     0
 [7,]    0    1    0    0    1    0    0    1    1     0
 [8,]    0    1    1    1    1    1    1    0    0     0
 [9,]    0    1    0    0    1    1    0    0    0     1
[10,]    1    1    0    1    0    1    1    0    0     0

sample_list[[2]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    0    1    0    0    0    1    1    0     0
 [2,]    1    0    1    0    1    1    0    0    1     0
 [3,]    1    0    0    0    0    0    0    0    0     1
 [4,]    1    0    0    0    0    0    0    1    0     0
 [5,]    0    1    0    0    0    0    1    0    0     1
 [6,]    0    1    1    1    0    0    0    0    0     1
 [7,]    0    1    0    0    1    1    0    1    0     1
 [8,]    0    1    0    1    1    1    1    0    1     0
 [9,]    1    0    1    1    1    1    0    1    0     1
[10,]    0    1    1    1    0    1    0    1    1     0

sample_list[[3]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    0    0    1    1    1    1    1    1     1
 [2,]    1    0    1    0    1    0    1    1    1     0
 [3,]    1    0    0    0    1    1    0    1    1     0
 [4,]    0    0    1    0    0    0    0    0    0     1
 [5,]    1    1    1    0    0    0    0    1    1     1
 [6,]    1    0    1    0    0    0    0    0    1     0
 [7,]    1    0    0    1    1    0    0    0    1     1
 [8,]    0    0    0    1    0    1    0    0    1     1
 [9,]    0    1    0    1    1    0    0    1    0     0
[10,]    1    1    0    1    0    0    1    1    1     0

sample_list[[4]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    1    1    0    0    0    0    0    0     1
 [2,]    0    0    0    0    1    0    0    0    1     0
 [3,]    0    0    0    1    1    1    1    0    1     0
 [4,]    0    0    1    0    1    0    0    1    0     1
 [5,]    0    0    0    1    0    1    1    1    0     1
 [6,]    0    0    0    0    1    0    1    0    0     0
 [7,]    0    0    1    1    0    1    0    1    1     0
 [8,]    1    0    1    1    0    0    1    0    1     0
 [9,]    1    0    1    0    1    1    1    0    0     0
[10,]    1    0    1    1    1    0    0    0    0     0

Solution

  • I think this gives you what you're after:

    library(tidyverse)
    
    accumulate(sample_list, `+`) %>%
      tail(-1) %>%
      map(~ rowSums(pmax(.x - 1, 0))) %>%
      bind_cols(.name_repair = ~ paste0("sum_rep", seq_along(.x)+1)) %>%
      rowid_to_column()
    
    # A tibble: 4 x 4
      rowid sum_rep2 sum_rep3 sum_rep4
      <int>    <dbl>    <dbl>    <dbl>
    1     1        0        0        0
    2     2        0        1        2
    3     3        1        3        4
    4     4        0        0        2
    

    Sample data:

    set.seed(0)
    n <- 4
    sample_list <- replicate(4, `diag<-`(matrix(sample(0:1, n*n, replace=TRUE),n), 0), simplify = FALSE)