How to extract the same term from a list of lists using lapply?

In the following dataset, I want to computer the function rda every 72 rows. Do this 10 times (seed).

set.seed(111)
library(vegan)
library(truncnorm)
df <- data.frame(sp1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.50, sd = 0.2), times = 10),
                 sp2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.70, sd = 0.1), times = 10),
                 env1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.45, sd = 0.6), times = 10),
                 env2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.65, sd = 0.6), times = 10),
                 seed = rep(c(1:10), each = 10)
)

Verifying that when dataset is not parsed through df$seed, the commands work fine

spe.rda  <- rda(df[,c(1:2)] ~ ., data = df[,c(3:4)])
ordiR2step(rda(df[,c(1:2)]~1, data=df[,c(3:4)]), 
           scope = formula(spe.rda), direction= "forward",
           R2scope=TRUE, pstep=1000)

When dataset IS parsed through df$seed. The spe.rda works fine and is a list of lists. Then, I tried applying lapply to the ordiR2step. The function requires scope = formula which is derived from spe.rda. I can't seem to get the formula out from there. Ideally, I want the formula to be extracted for every seed. How can I do this?

library(vegan)
spe.rda  <- lapply(split(df[1:4], df$seed), function(x) rda(x[,1:2] ~ ., data= x[,3:4])) #Generates a list of lists

spe.formula <- formula(spe.rda) 
> Error in formula.default(spe.rda) : invalid formula


output <- lapply(split(df[1:4], df$seed), ordiR2step(rda(df[,c(1:2)]~1, 
data=df[,c(3:4)]), scope= spe.formula, direction= "forward", R2scope=TRUE, pstep=1000)) 
> Error in ordiR2step(rda(df[, c(1:2)] ~ 1, data = df[, c(3:4)]), scope = spe.formula,  : object 'spe.formula' not found

EDIT: Finally, For each seed, I want to extract R2 from test to store the following in a dataframe with two columns as such. How can I do this? I can't spot the statistic when I open each of the lists but it does appear on the console.

seed variables         R2.adjusted
  1  <none>            0.00000000
  1  + env1           -0.01746233
  1  + env2           -0.10424990
  1  <All variables>  -0.15618726
  2   ...              ..
  2   ...              ..

One option is to use capture and then (painfully) parsing them. Any other options?

output <- lapply(seq_along(spe.rda), function(i){
  ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]),  scope = formula(spe.rda[[i]]), direction= "forward",
             R2scope=TRUE, pstep=1000)})

r2_text = capture.output(output)

Solution

I would probably do something like this:

set.seed(111)
library(vegan)
#> Loading required package: permute
#> Loading required package: lattice
#> This is vegan 2.6-4
library(truncnorm)
df <- data.frame(sp1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.50, sd = 0.2), times = 10),
                 sp2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.70, sd = 0.1), times = 10),
                 env1 = rep(rtruncnorm(10, a=0, b=1, mean = 0.45, sd = 0.6), times = 10),
                 env2 = rep(rtruncnorm(10, a=0, b=1, mean = 0.65, sd = 0.6), times = 10),
                 seed = rep(c(1:10), each = 10)
)

First, split the data (don't do this inside the first lapply():

spl_dat <- split(df[1:4], df$seed)

Next, call rda() on the split data using lapply() as you did initially. Splitting the data outside the first lapply() makes the split data available to the next lapply().

spe.rda  <- lapply(spl_dat, function(x) rda(x[,1:2] ~ ., data= x[,3:4])) #Generates a list of lists

Next, lapply() along the different elements of spe.rda where you can grab the corresponding data spl_dat[[i]] and the formula from the appropriate element of spe.rda: spe.rda[[i]]

lapply(seq_along(spe.rda), function(i){
  ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]), 
             scope = formula(spe.rda[[i]]), direction= "forward",
             R2scope=TRUE, pstep=1000)})
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> 
#> Step: R2.adj= 0 
#> Call: spl_dat[[i]][, c(1:2)] ~ 1 
#>  
#>                 R2.adjusted
#> <none>           0.00000000
#> + env1          -0.01746233
#> + env2          -0.10424990
#> <All variables> -0.15618726
#> [[1]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[2]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[3]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[4]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[5]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[6]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[7]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[8]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[9]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373 
#> 
#> 
#> [[10]]
#> Call: rda(formula = spl_dat[[i]][, c(1:2)] ~ 1, data = spl_dat[[i]][,
#> c(3:4)])
#> 
#>               Inertia Rank
#> Total         0.03411     
#> Unconstrained 0.03411    2
#> Inertia is variance 
#> 
#> Eigenvalues for unconstrained axes:
#>      PC1      PC2 
#> 0.024732 0.009373

^{Created on 2023-03-08 with reprex v2.0.2}

Edit: parse the printed output

Not sure this will work every time, but assuming the output is pretty regular, this may do the trick. I wrote a little function called parse_r2() which takes the printed output and finds the r-squared values and parses the relevant rows:

parse_r2s <- function(x){
  start <- grep("R2.adjusted", x)
  ends <- which(x == "")
  end <- min(ends[which(ends > start)])
  x <- x[(start + 1):(end-1)]  
  vbl = trimws(gsub("(.*)( [0-9\\.\\-]*?)$", "\\1", x))
  r2 = trimws(gsub(".*( [0-9\\.\\-]*?)$", "\\1", x))
  data.frame(variable = vbl, R2.adjusted = as.numeric(r2))
}

out <- lapply(seq_along(spe.rda), function(i){
  parse_r2s(capture.output(ordiR2step(rda(spl_dat[[i]][,c(1:2)]~1, data=spl_dat[[i]][,c(3:4)]), 
             scope = formula(spe.rda[[i]]), direction= "forward",
             R2scope=TRUE, pstep=1000)))})

out
#> [[1]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[2]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[3]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[4]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[5]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[6]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[7]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[8]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[9]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726
#> 
#> [[10]]
#>          variable R2.adjusted
#> 1          <none>  0.00000000
#> 2          + env1 -0.01746233
#> 3          + env2 -0.10424990
#> 4 <All variables> -0.15618726

^{Created on 2023-03-08 with reprex v2.0.2}