Search code examples
rapplylapply

R: Obtain median absolute deviation (MAD) in each column of nested lists of matrices


I am trying to obtain the column MAD my nested list and return my nested list with the same structure. My example of list as below from this page

k = 5
library(mvtnorm)
set.seed(123)
listofdfs <- list()
a=0
variance=1
g1=5
g2=5
mn=0
covar=matrix(c(variance,a,a,a,variance,a,a,a,variance),ncol=3)

for (i in 1:k){  
  x1 = rmvnorm(n=g1,mean=c(mn,mn,0),sigma=covar)
  x2 = rmvnorm(n=g2,mean=c(mn,mn,0),sigma=covar) 
  data = list(x1=x1,x2=x2) # to make each sub element to x1 and x2
  df = data
  listofdfs[[i]]=df    
}
listofdfs

#rename the datasets
listofdfs <- setNames(listofdfs, paste0('df', 1:5))

head(listofdfs) 

$df1
$df1$x1
             [,1]       [,2]       [,3]
 [1,] -0.56047565 -0.2301775  1.5587083
 [2,]  0.07050839  0.1292877  1.7150650
 [3,]  0.46091621 -1.2650612 -0.6868529
 [4,] -0.44566197  1.2240818  0.3598138
 [5,]  0.40077145  0.1106827 -0.5558411
 [6,]  1.78691314  0.4978505 -1.9666172
 [7,]  0.70135590 -0.4727914 -1.0678237
 [8,] -0.21797491 -1.0260044 -0.7288912
 [9,] -0.62503927 -1.6866933  0.8377870
[10,]  0.15337312 -1.1381369  1.2538149

$df1$x2
             [,1]        [,2]       [,3]
 [1,]  0.42646422 -0.29507148  0.8951257
 [2,]  0.87813349  0.82158108  0.6886403
 [3,]  0.55391765 -0.06191171 -0.3059627
 [4,] -0.38047100 -0.69470698 -0.2079173
 [5,] -1.26539635  2.16895597  1.2079620
 [6,] -1.12310858 -0.40288484 -0.4666554
 [7,]  0.77996512 -0.08336907  0.2533185
 [8,] -0.02854676 -0.04287046  1.3686023
 [9,] -0.22577099  1.51647060 -1.5487528
[10,]  0.58461375  0.12385424  0.2159416

I want to find the value of MAD for each column in x1 and x2 for k=5. I'm using this function and obtain value MAD for column 1 of x1 from $df1 to $df5:

lapply(listofdfs, function(y) mad(y$x1[,1]))

$df1
[1] 0.6720464

$df2
[1] 0.6111537

$df3
[1] 0.5019855

$df4
[1] 0.7117195

$df5
[1] 0.6085593

I have to use this function to obtain the value of MAD the rest of the column in both x1 and x2. How can I obtain the value of MAD (for k=5 in column =3 for both x1 and x2) in one single function?


Solution

  • It sounds like you're looking for something like this:

    library(purrr)
    map(listofdfs, # for each element in listofdfs
      \(d) map(d, # for each matrix in that element
       \(x) map(ncol(x) |> seq_len(), # for each column
        \(n) stats::mad(x[,n])))) # calculate mad
    

    Output:

    [[1]]
    [[1]]$x1
    [[1]]$x1[[1]]
    [1] 0.5788186
    
    [[1]]$x1[[2]]
    [1] 0.5053593
    
    [[1]]$x1[[3]]
    [1] 1.551788
    
    
    [[1]]$x2
    [[1]]$x2[[1]]
    [1] 0.8124393
    
    [[1]]$x2[[2]]
    [1] 0.8201937
    
    [[1]]$x2[[3]]
    [1] 1.835052
    
    
    
    [[2]]
    [[2]]$x1
    [[2]]$x1[[1]]
    [1] 0.6696449
    
    [[2]]$x1[[2]]
    [1] 0.9381823
    
    [[2]]$x1[[3]]
    [1] 0.7699464
    
    
    [[2]]$x2
    [[2]]$x2[[1]]
    [1] 0.9090718
    
    [[2]]$x2[[2]]
    [1] 0.247186
    
    [[2]]$x2[[3]]
    [1] 1.012018
    
    
    
    [[3]]
    [[3]]$x1
    [[3]]$x1[[1]]
    [1] 0.8265921
    
    [[3]]$x1[[2]]
    [1] 0.3067163
    
    [[3]]$x1[[3]]
    [1] 0.9440248
    
    
    [[3]]$x2
    [[3]]$x2[[1]]
    [1] 0.3024162
    
    [[3]]$x2[[2]]
    [1] 0.1273361
    
    [[3]]$x2[[3]]
    [1] 0.7478695
    
    
    
    [[4]]
    [[4]]$x1
    [[4]]$x1[[1]]
    [1] 1.156026
    
    [[4]]$x1[[2]]
    [1] 1.32832
    
    [[4]]$x1[[3]]
    [1] 0.703393
    
    
    [[4]]$x2
    [[4]]$x2[[1]]
    [1] 0.8368312
    
    [[4]]$x2[[2]]
    [1] 1.234974
    
    [[4]]$x2[[3]]
    [1] 0.7706332
    
    
    
    [[5]]
    [[5]]$x1
    [[5]]$x1[[1]]
    [1] 0.1668533
    
    [[5]]$x1[[2]]
    [1] 1.520311
    
    [[5]]$x1[[3]]
    [1] 0.4594677
    
    
    [[5]]$x2
    [[5]]$x2[[1]]
    [1] 1.408639
    
    [[5]]$x2[[2]]
    [1] 0.1901449
    
    [[5]]$x2[[3]]
    [1] 0.3374949