I am trying to loop over regressions using purrr and caret, but I have trouble passing arguements.
# sample dataframe
foo <- data.frame(y1 = runif(10),
y2 = runif(10),
y3 = runif(10),
x1 = runif(10),
x2 = runif(10),
x3 = runif(10)
)
# list of dependent and independent variables
Yvars <- c("y1","y2","y3")
Xvars <- c("x1","x2","x3")
# library(caret)
# custom caret function to loop over vars
caretlm <- function(xvars, yvars, data) {
set.seed(1123)
lmFitTest <- train(x = eval(substitute(xvars)), y = eval(substitute(yvars)), data = data,
method = "lm",
trControl = trainControl(method = "cv")
)
}
# library(purrr)
modellist_lm <- map2(xvars, yvars, ~caretlm(.x, .y, foo) )
# Error in eval(substitute(xvars)) : object '.x' not found
when I do not use eval and substitute, then I get another error
caretlm2 <- function(xvars, yvars, data) {
set.seed(1123)
lmFitTest <- train(x = xvars, y = yvars, data = data,
method = "lm",
trControl = trainControl(method = "cv")
)
}
modellist_lm <- map2(xvars, yvars, ~caretlm2(.x, .y, foo) )
# Error: Please use column names for `x`
Please suggest if there are better methods or frameworks..
Not sure about the x, y method, but that function has a formula method which appears to me to be easier to work with (note that I changed Data
to data
)
caretlm <- function(xvars, yvars, data) {
set.seed(1123)
lmFitTest <- train(reformulate(xvars, yvars), data = foo,
method = "lm",
trControl = trainControl(method = "cv")
)
}
modellist_lm <- map2(Xvars, Yvars, ~caretlm(.x, .y, foo))