Having the following
> library(gbm)
> mdl<-gbm::gbm(data=iris,formula=Species ~ .,distribution="gaussian")
Then:
> mdl
gbm::gbm(formula = Species ~ ., distribution = "gaussian", data = iris)
A gradient boosted model with gaussian loss function.
100 iterations were performed.
There were 4 predictors of which 4 had non-zero influence.
What is what I want.
However, If I use do.call:
> mdl<-do.call(gbm::gbm,list(data=iris,formula=Species ~ .,distribution="gaussian"))
Then
> mdl
(function (formula = formula(data), distribution = "bernoulli",
data = list(), weights, var.monotone = NULL, n.trees = 100,
interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1,
bag.fraction = 0.5, train.fraction = 1, cv.folds = 0, keep.data = TRUE,
...
...
...
A gradient boosted model with gaussian loss function.
100 iterations were performed.
There were 4 predictors of which 4 had non-zero influence.
It prints the definition of gbm
function at the beginning, then the gbm
call (including the whole dataset iris) and finally the text that I am looking for.
I need to use do.call
because I want to parametrize all the arguments for any classification algorithm and then put the output inside a verbatimTextOutput
in shiny
.
Is there any way to prevent do.call
to return the definition of gbm
as well as the entire dataset?. Or maybe other way to execute gbm
passing arguments inside a list?
Thanks.
We have to do two things to prevent this behavior:
First, we can use substitute()
on iris
. This will prevent the call
from listing all columns of the iris
data set.
Second, we should avoid using gbm::gbm
in do.call
and rather load the library and use the string "gbm"
instead to call the function:
library(gbm)
mdl <- do.call("gbm",
list(data = substitute(iris),
formula= Species ~ .,
distribution = "gaussian")
)
mdl
#> gbm(formula = Species ~ ., distribution = "gaussian", data = iris)
#> A gradient boosted model with gaussian loss function.
#> 100 iterations were performed.
#> There were 4 predictors of which 4 had non-zero influence.
If we use gbm::gbm
inside do call it will include the whole function definition in the captured call:
mdl <- do.call(gbm::gbm,
list(data = substitute(iris),
formula= Species ~ .,
distribution = "gaussian")
)
mdl
#> (function (formula = formula(data), distribution = "bernoulli",
#> data = list(), weights, var.monotone = NULL, n.trees = 100,
#> interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.1,
#> bag.fraction = 0.5, train.fraction = 1, cv.folds = 0, keep.data = TRUE,
#> verbose = FALSE, class.stratify.cv = NULL, n.cores = NULL)
#> {
#> mcall <- match.call()
#> lVerbose <- if (!is.logical(verbose)) {
#> FALSE
#> }
#> else {
#> verbose
#> }
#> mf <- match.call(expand.dots = FALSE)
#> m <- match(c("formula", "data", "weights", "offset"), names(mf),
#> 0)
#> mf <- mf[c(1, m)]
#> mf$drop.unused.levels <- TRUE
#> mf$na.action <- na.pass
#> mf[[1]] <- as.name("model.frame")
#> m <- mf
#> mf <- eval(mf, parent.frame())
#> Terms <- attr(mf, "terms")
#> w <- model.weights(mf)
#> offset <- model.offset(mf)
#> y <- model.response(mf)
#> if (missing(distribution)) {
#> distribution <- guessDist(y)
#> }
#> if (is.character(distribution)) {
#> distribution <- list(name = distribution)
#> }
#> if (!is.element(distribution$name, getAvailableDistributions())) {
#> stop("Distribution ", distribution$name, " is not supported.")
#> }
#> if (distribution$name == "multinomial") {
#> warning("Setting `distribution = \"multinomial\"` is ill-advised as it is ",
#> "currently broken. It exists only for backwards compatibility. ",
#> "Use at your own risk.", call. = FALSE)
#> }
#> var.names <- attributes(Terms)$term.labels
#> x <- model.frame(terms(reformulate(var.names)), data = data,
#> na.action = na.pass)
#> response.name <- as.character(formula[[2L]])
#> class.stratify.cv <- getStratify(class.stratify.cv, d = distribution)
#> group <- NULL
#> num.groups <- 0
#> if (distribution$name != "pairwise") {
#> nTrain <- floor(train.fraction * nrow(x))
#> }
#> else {
#> distribution.group <- distribution[["group"]]
#> if (is.null(distribution.group)) {
#> stop(paste("For pairwise regression, `distribution` must be a list of",
#> "the form `list(name = \"pairwise\", group = c(\"date\",",
#> "\"session\", \"category\", \"keywords\"))`."))
#> }
#> i <- match(distribution.group, colnames(data))
#> if (any(is.na(i))) {
#> stop("Group column does not occur in data: ", distribution.group[is.na(i)],
#> ".")
#> }
#> group <- factor(do.call(paste, c(data[, distribution.group,
#> drop = FALSE], sep = ":")))
#> if ((!missing(weights)) && (!is.null(weights))) {
#> w.min <- tapply(w, INDEX = group, FUN = min)
#> w.max <- tapply(w, INDEX = group, FUN = max)
#> if (any(w.min != w.max)) {
#> stop("For `distribution = \"pairwise\"`, all instances for the same ",
#> "group must have the same weight.")
#> }
#> w <- w * length(w.min)/sum(w.min)
#> }
#> perm.levels <- levels(group)[sample(1:nlevels(group))]
#> group <- factor(group, levels = perm.levels)
#> ord.group <- order(group, -y)
#> group <- group[ord.group]
#> y <- y[ord.group]
#> x <- x[ord.group, , drop = FALSE]
#> w <- w[ord.group]
#> num.groups.train <- max(1, round(train.fraction * nlevels(group)))
#> nTrain <- max(which(group == levels(group)[num.groups.train]))
#> Misc <- group
#> }
#> cv.error <- NULL
#> if (cv.folds == 1) {
#> cv.folds <- 0
#> }
#> if (cv.folds > 1) {
#> cv.results <- gbmCrossVal(cv.folds = cv.folds, nTrain = nTrain,
#> n.cores = n.cores, class.stratify.cv = class.stratify.cv,
#> data = data, x = x, y = y, offset = offset, distribution = distribution,
#> w = w, var.monotone = var.monotone, n.trees = n.trees,
#> interaction.depth = interaction.depth, n.minobsinnode = n.minobsinnode,
#> shrinkage = shrinkage, bag.fraction = bag.fraction,
#> var.names = var.names, response.name = response.name,
#> group = group)
#> cv.error <- cv.results$error
#> p <- cv.results$predictions
#> }
#> gbm.obj <- gbm.fit(x = x, y = y, offset = offset, distribution = distribution,
#> w = w, var.monotone = var.monotone, n.trees = n.trees,
#> interaction.depth = interaction.depth, n.minobsinnode = n.minobsinnode,
#> shrinkage = shrinkage, bag.fraction = bag.fraction, nTrain = nTrain,
#> keep.data = keep.data, verbose = lVerbose, var.names = var.names,
#> response.name = response.name, group = group)
#> gbm.obj$train.fraction <- train.fraction
#> gbm.obj$Terms <- Terms
#> gbm.obj$cv.error <- cv.error
#> gbm.obj$cv.folds <- cv.folds
#> gbm.obj$call <- mcall
#> gbm.obj$m <- m
#> if (cv.folds > 1) {
#> gbm.obj$cv.fitted <- p
#> }
#> if (distribution$name == "pairwise") {
#> gbm.obj$ord.group <- ord.group
#> gbm.obj$fit <- gbm.obj$fit[order(ord.group)]
#> }
#> gbm.obj
#> })(formula = Species ~ ., distribution = "gaussian", data = iris)
#> A gradient boosted model with gaussian loss function.
#> 100 iterations were performed.
#> There were 4 predictors of which 4 had non-zero influence.
Created on 2023-02-20 by the reprex package (v2.0.1)