Search code examples
rfor-loopregressionformulalapply

How to extract the same term from a list of lists using lapply?


In the following dataset, I want to computer the function rda every 72 rows. Do this 10 times (seed).

set.seed(111)
library(vegan)
library(truncnorm)
df <- data.frame(sp1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.50, sd = 0.2), times = 10),
                 sp2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.70, sd = 0.1), times = 10),
                 env1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.45, sd = 0.6), times = 10),
                 env2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.65, sd = 0.6), times = 10),
                 seed = rep(c(1:10), each = 10)
)

Verifying that when dataset is not parsed through df$seed, the commands work fine

spe.rda  <- rda(df[,c(1:2)] ~ ., data = df[,c(3:4)])
ordiR2step(rda(df[,c(1:2)]~1, data=df[,c(3:4)]), 
           scope = formula(spe.rda), direction= "forward",
           R2scope=TRUE, pstep=1000)

When dataset IS parsed through df$seed. The spe.rda works fine and is a list of lists. Then, I tried applying lapply to the ordiR2step. The function requires scope = formula which is derived from spe.rda. I can't seem to get the formula out from there. Ideally, I want the formula to be extracted for every seed. How can I do this?

library(vegan)
spe.rda  <- lapply(split(df[1:4], df$seed), function(x) rda(x[,1:2] ~ ., data= x[,3:4])) #Generates a list of lists

enter image description here

spe.formula <- formula(spe.rda) 
> Error in formula.default(spe.rda) : invalid formula


output <- lapply(split(df[1:4], df$seed), ordiR2step(rda(df[,c(1:2)]~1, 
data=df[,c(3:4)]), scope= spe.formula, direction= "forward", R2scope=TRUE, pstep=1000)) 
> Error in ordiR2step(rda(df[, c(1:2)] ~ 1, data = df[, c(3:4)]), scope = spe.formula,  : object 'spe.formula' not found

EDIT: Finally, For each seed, I want to extract R2 from test to store the following in a dataframe with two columns as such. How can I do this? I can't spot the statistic when I open each of the lists but it does appear on the console.

seed variables         R2.adjusted
  1  <none>            0.00000000
  1  + env1           -0.01746233
  1  + env2           -0.10424990
  1  <All variables>  -0.15618726
  2   ...              ..
  2   ...              ..

One option is to use capture and then (painfully) parsing them. Any other options?

output <- lapply(seq_along(spe.rda), function(i){
  ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]),  scope = formula(spe.rda[[i]]), direction= "forward",
             R2scope=TRUE, pstep=1000)})

r2_text = capture.output(output)

Solution

  • I would probably do something like this:

    set.seed(111)
    library(vegan)
    #> Loading required package: permute
    #> Loading required package: lattice
    #> This is vegan 2.6-4
    library(truncnorm)
    df <- data.frame(sp1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.50, sd = 0.2), times = 10),
                     sp2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.70, sd = 0.1), times = 10),
                     env1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.45, sd = 0.6), times = 10),
                     env2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.65, sd = 0.6), times = 10),
                     seed = rep(c(1:10), each = 10)
    )
    

    First, split the data (don't do this inside the first lapply():

    spl_dat <- split(df[1:4], df$seed)
    

    Next, call rda() on the split data using lapply() as you did initially. Splitting the data outside the first lapply() makes the split data available to the next lapply().

    spe.rda  <- lapply(spl_dat, function(x) rda(x[,1:2] ~ ., data= x[,3:4])) #Generates a list of lists
    

    Next, lapply() along the different elements of spe.rda where you can grab the corresponding data spl_dat[[i]] and the formula from the appropriate element of spe.rda: spe.rda[[i]]

    lapply(seq_along(spe.rda), function(i){
      ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]), 
                 scope = formula(spe.rda[[i]]), direction= "forward",
                 R2scope=TRUE, pstep=1000)})
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> 
    #> Step: R2.adj= 0 
    #> Call: spl_dat[[i]][, c(1:2)] ~ 1 
    #>  
    #>                 R2.adjusted
    #> <none>           0.00000000
    #> + env1          -0.01746233
    #> + env2          -0.10424990
    #> <All variables> -0.15618726
    #> [[1]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[2]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[3]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[4]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[5]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[6]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[7]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[8]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[9]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373 
    #> 
    #> 
    #> [[10]]
    #> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
    #> c(3:4)])
    #> 
    #>               Inertia Rank
    #> Total         0.03411     
    #> Unconstrained 0.03411    2
    #> Inertia is variance 
    #> 
    #> Eigenvalues for unconstrained axes:
    #>      PC1      PC2 
    #> 0.024732 0.009373
    

    Created on 2023-03-08 with reprex v2.0.2


    Edit: parse the printed output

    Not sure this will work every time, but assuming the output is pretty regular, this may do the trick. I wrote a little function called parse_r2() which takes the printed output and finds the r-squared values and parses the relevant rows:

    parse_r2s <- function(x){
      start <- grep("R2.adjusted", x)
      ends <- which(x == "")
      end <- min(ends[which(ends > start)])
      x <- x[(start + 1):(end-1)]  
      vbl = trimws(gsub("(.*)( [0-9\\.\\-]*?)$", "\\1", x))
      r2 = trimws(gsub(".*( [0-9\\.\\-]*?)$", "\\1", x))
      data.frame(variable = vbl, R2.adjusted = as.numeric(r2))
    }
    
    out <- lapply(seq_along(spe.rda), function(i){
      parse_r2s(capture.output(ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]), 
                 scope = formula(spe.rda[[i]]), direction= "forward",
                 R2scope=TRUE, pstep=1000)))})
    
    out
    #> [[1]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[2]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[3]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[4]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[5]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[6]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[7]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[8]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[9]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    #> 
    #> [[10]]
    #>          variable R2.adjusted
    #> 1          <none>  0.00000000
    #> 2          + env1 -0.01746233
    #> 3          + env2 -0.10424990
    #> 4 <All variables> -0.15618726
    

    Created on 2023-03-08 with reprex v2.0.2