Search code examples
rloopsapplyreplicationglmnet

Complex Replicate function in R


I have a code in R that in the end produce, among others, a vector named sigma and 3 subsets namely, sub1.sigma, sb2.sigma, sub3.sigma. I want to replicate this procedure n times let's say 10, and observe the values on the above mentioned vectors. I'm using the replicate function as follows

set.seed(2021)

code <- replicate(10,{
data<-matrix(rnorm(100*5,mean=0,sd=1), 100, 5) 
colnames(data) <- c("X1", "X2", "X3", "X4", "X5")
data <- as.data.frame(data)
a <- 5 
b <- 0.8
c <- 100

data[,2] <- a*data[,1] - b*rnorm(c)
data[,3] <- a*data[,1] + b*rnorm(c)
data[,4] <- a*data[,1] - b*rnorm(c)

library(glmnet)
library(coefplot)

A <- as.matrix(data)
set.seed(1)
results <- lapply(seq_len(ncol(A)), function(i) {
  list(
    cvfit = cv.glmnet(A[, -i] , A[, i] , standardize = TRUE , type.measure = "mse" , nfolds = 10 , alpha = 1)
  )
})

lam <- as.data.frame(`names<-`(
  lapply(results, function(x) (x$cvfit$lambda.min)), 
  paste0("X", seq_along(results))
))

sigma<- matrix(rnorm(1*5,mean=0,sd=1), 1, 5) 
colnames(sigma) <- c("X1", "X2", "X3", "X4", "X5")
as.vector(sigma)
sub1.sigma <- subset(sigma, select = sigma <= sum(lam))
sub2.sigma <- subset(sigma, select = sigma <= 2*sum(lam))
sub3.sigma <- subset(sigma, select = sigma <= 3*sum(lam))
}, simplify = FALSE)

code[1:10]

The above produce the following results, which i can't figure out to what they correspond. Is it the sigma or a sub.sigma. I want to create 4 dataframes containing the sigma, sub1.sigma, sb2.sigma and sub3.sigmavalues in each row of each replicate run. How can i achieve that in R? Should I use another loop function?

> code[1:10]
[[1]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[2]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[3]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[4]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[5]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[6]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[7]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[8]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[9]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

[[10]]
            X1        X4        X5
[1,] 0.8032832 0.6772685 0.3799627

Solution

  • Since you are not returning anything from replicate, it is returning the last line from the code which is sub3.sigma. You can return a list of output.

    library(glmnet)
    library(coefplot)
    set.seed(2021)
    
    code <- replicate(10,{
      data<-matrix(rnorm(100*5,mean=0,sd=1), 100, 5) 
      colnames(data) <- c("X1", "X2", "X3", "X4", "X5")
      data <- as.data.frame(data)
      a <- 5 
      b <- 0.8
      c <- 100
      
      data[,2] <- a*data[,1] - b*rnorm(c)
      data[,3] <- a*data[,1] + b*rnorm(c)
      data[,4] <- a*data[,1] - b*rnorm(c)
      
      A <- as.matrix(data)
      set.seed(1)
      results <- lapply(seq_len(ncol(A)), function(i) {
        list(
          cvfit = cv.glmnet(A[, -i] , A[, i] , standardize = TRUE , type.measure = "mse" , nfolds = 10 , alpha = 1)
        )
      })
      
      lam <- as.data.frame(`names<-`(
        lapply(results, function(x) (x$cvfit$lambda.min)), 
        paste0("X", seq_along(results))
      ))
      
      sigma<- matrix(rnorm(1*5,mean=0,sd=1), 1, 5) 
      colnames(sigma) <- c("X1", "X2", "X3", "X4", "X5")
      sub1.sigma <- subset(sigma, select = sigma <= sum(lam))
      sub2.sigma <- subset(sigma, select = sigma <= 2*sum(lam))
      sub3.sigma <- subset(sigma, select = sigma <= 3*sum(lam))
      dplyr::lst(sigma, sub1.sigma, sub2.sigma, sub3.sigma)
      
    }, simplify = FALSE)
    

    To combine the dataframes in it's own list, you can use :

    result <- lapply(purrr::transpose(code), function(x) do.call(rbind, x))