Search code examples
arraysrsapplymapply

intersect multi-dimensional arrays in R


I am interested in intersecting multiple lists of identifiers, making a table with the number of overlaps between pairs of lists ('rk' vs 't'). I have a vague idea that sapply is the way to go but I am still stuck after searching and reading tutorials.

rk1 <- list("YH_sensitive_933","CS_sensitive_1294","YH_sensitive_944","JB_persistent_1224","CS_sensitive_1299","YY_sensitive_922", "YH_sensitive_952","YA_sensitive_949")   
rk2 <- list("YH_sensitive_944","JB_persistent_1224","CS_sensitive_1299","YY_sensitive_922", "YH_sensitive_952","YA_sensitive_949")
t1 <- list("YH_sensitive_933","CS_sensitive_1294","YH_sensitive_944")
t2 <- list("YH_sensitive_944","JB_persistent_1224")
t3 <- list("CS_sensitive_1299","YY_sensitive_922","YH_sensitive_944")
t4 <- list("YH_sensitive_952","YA_sensitive_949")   

Edit: I thought maybe it'd best to group the two lists of lists and try sapply/mapply as suggested

F <- list(t1,t2,t3,t4)
G <- list(rk1,rk2)
> sapply(mapply(intersect,F,G), length)
[1] 3 2 3 2

but I'm a R beginner and would really appreciate some guidance on looping and using the apply functionals. But I only see the intersections for rk1 (but not rk2, which should be 1 2 3 2)


Solution

  • Using lapply/sapply

     F <- list(t1, t2, t3, t4)
     G <- list(rk1, rk2)  
    
    res <- do.call(`c`,setNames(lapply(G, function(.y) 
             setNames(sapply(F, `intersect`, .y), paste0("t",1:4))), paste0("rk",1:2)))
    
     sapply(res, length)
     #rk1.t1 rk1.t2 rk1.t3 rk1.t4 rk2.t1 rk2.t2 rk2.t3 rk2.t4 
     #    3      2      3      2      1      2      3      2 
    
    res$rk1.t1
    #[[1]]
    #[1] "YH_sensitive_933"
    
    #[[2]]
    #[1] "CS_sensitive_1294"
    
    #[[3]]
    #[1] "YH_sensitive_944"
    
    
    intersect(rk1,t1)
    #[[1]]
    #[1] "YH_sensitive_933"
    
    #[[2]]
    #[1] "CS_sensitive_1294"
    
    #[[3]]
    #[1] "YH_sensitive_944"
    
    res$rk2.t1
    # [[1]]
    #[1] "YH_sensitive_944"
    
     intersect(rk2, t1)
     #[[1]]
     #[1] "YH_sensitive_944"
    

    Or you could use mapply (basic idea from @Richard Scriven's comment)

     dat1 <- expand.grid(ls(pattern="^rk"), ls(pattern="^t"),stringsAsFactors=F)
     res1 <- mapply(intersect, mget(dat1[,1]), mget(dat1[,2]))
    
     res1[[1]]
     #[[1]]
     #[1] "YH_sensitive_933"
    
     #[[2]]
     #[1] "CS_sensitive_1294"
    
     #[[3]]
     #[1] "YH_sensitive_944"
    

    Update

    To convert the res to a matrix

      mat1 <- do.call(cbind,lapply(lapply(res, unlist),
                         `length<-`, max(sapply(res, length))))
    
    
      mat1
      #       rk1.t1              rk1.t2               rk1.t3             
      #[1,] "YH_sensitive_933"  "YH_sensitive_944"   "CS_sensitive_1299"
      #[2,] "CS_sensitive_1294" "JB_persistent_1224" "YY_sensitive_922" 
      #[3,] "YH_sensitive_944"  NA                   "YH_sensitive_944" 
      #     rk1.t4             rk2.t1             rk2.t2              
      #[1,] "YH_sensitive_952" "YH_sensitive_944" "YH_sensitive_944"  
      #[2,] "YA_sensitive_949" NA                 "JB_persistent_1224"
      #[3,] NA                 NA                 NA                  
      #      rk2.t3              rk2.t4            
      #[1,] "CS_sensitive_1299" "YH_sensitive_952"
      #[2,] "YY_sensitive_922"  "YA_sensitive_949"
      #[3,] "YH_sensitive_944"  NA 
    

    Update2

    If you need to get the length output in matrix,

      resL <- sapply(res,length)
      m1 <-  matrix(resL, nrow=2, byrow=TRUE,
              dimnames=list(paste0("rk", 1:2), paste0("t",1:4)))
    
      m1
      #   t1 t2 t3 t4
      #rk1  3  2  3  2
      #rk2  1  2  3  2