Search code examples
rmetaprogrammingellipsis

capture the environment of ellipsis/dots


{rlang} has this unexported function that can be used to capture the ellipsis arguments (names, expressions, and env). It powers the magic of rlang::enquos().

f <- function(...) rlang:::captureDots()

g <- function(...) f(..., b = z)

g(a = x, y)
#> $a
#> $a$expr
#> x
#> 
#> $a$env
#> <environment: R_GlobalEnv>
#> 
#> 
#> [[2]]
#> [[2]]$expr
#> y
#> 
#> [[2]]$env
#> <environment: R_GlobalEnv>
#> 
#> 
#> $b
#> $b$expr
#> z
#> 
#> $b$env
#> <environment: 0x130f8d8a8>

I would like to isolate, and hopefully understand this functionality but I don't find my way in the C code, there's a lot of it in rlang and it seems touching anything breaks everything.

My request is to have a minimal, efficient, self contained way of recreating this functionality (with or without inspiration from {rlang}).

Capturing the names and expressions is easy enough in base R, but fetching environments is not. I don't believe this can be done without low level languages.


Here are tougher examples :

n <- 1
fun1 <- function(x, ..., y) {
  n <- 2
  fun2(n, ..., j=x, u = y, v = n)
}

fun2 <- function(u, ..., v) {
  n <- 3
  rlang:::captureDots()
}

res <- fun1(1, i=n, x = n, y = n)
res
#> [[1]]
#> [[1]]$expr
#> n
#> 
#> [[1]]$env
#> <environment: 0x11232d778>
#> 
#> 
#> [[2]]
#> [[2]]$expr
#> [1] 1
#> 
#> [[2]]$env
#> <environment: R_GlobalEnv>
#> 
#> 
#> $i
#> $i$expr
#> n
#> 
#> $i$env
#> <environment: R_GlobalEnv>
#> 
#> 
#> $j
#> $j$expr
#> x
#> 
#> $j$env
#> <environment: 0x11232d778>

lapply(res, function(x) eval(x[[1]], x[[2]]))
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 1
#> 
#> $i
#> [1] 1
#> 
#> $j
#> [1] 1
dots <- (function(...) get("..."))(1, i=n, x = n, y = n)
res2 <- with(list(... = dots), fun1(...))
lapply(res2, function(x) eval(x[[1]], x[[2]]))
#> [[1]]
#> [1] 2
#> 
#> [[2]]
#> [1] 1
#> 
#> $i
#> [1] 1
#> 
#> $j
#> [1] 1
foo <- function(...) {
  bar <- function() rlang:::captureDots()
  bar()
}

foo(a=x)
#> $a
#> $a$expr
#> x
#> 
#> $a$env
#> <environment: R_GlobalEnv>

foo2 <- function(...) {
  bar <- function(...) rlang:::captureDots()
  bar()
}

foo2(a=x)
#> NULL

Solution

  • Although it's possible to write a function that can iterate down the call stack in R, collecting calls and environments along the way (see the addendum below), the code is very complex, and there are too many gotchas, such as calls made via eval, to make it a robust solution one could use in production code. Rather than trying to recreate the contents of the dots from the call stack, it is better to extract and evaluate the contents of the ... object directly.

    Unfortunately, this does require a small amount of compiled code. Under the hood, in the C code, a <...> object is stored as a DOTSXP, which is a specialized pairlist of promises. Each promise contains an unevaluated expression and the environment in which the expression should be evaluated. There are no user-facing functions in base R that allow direct extraction of environments and expressions from promises; they need to be obtained using the C functions PRENV and PREXPR, which are accessible from Rinternals.h. We can access the full pairlist of promises inside a DOTSXP using the C functions TAG, CAR and CDR, which are also accessible in Rinternals.h.

    This means that in total we need 5 trivial C functions:

    Rcpp::cppFunction('SEXP cdr(SEXP obj)    { return CDR(obj);   }')
    Rcpp::cppFunction('SEXP car(SEXP obj)    { return CAR(obj);   }')
    Rcpp::cppFunction('SEXP tag(SEXP obj)    { return TAG(obj);   }')
    Rcpp::cppFunction('SEXP prenv(SEXP obj)  { return PRENV(obj); }')
    Rcpp::cppFunction('SEXP prexpr(SEXP obj) { return PREXPR(obj);}')
    

    Although I have used Rcpp for convenience here, these functions could be written in a C file in your package, making this solution dependency free.

    With these functions now defined, we can emulate rlang:::captureDots with the following function that uses only base R and the above C functions:

    capture_dots <- function() {
      
      dots <- tryCatch(
        get("...", parent.frame()),
        error = function(e) list()
      )
      
      if(identical(dots, list())) return(list())
      
      li <- c(car(dots), cdr(dots))
      first_name <- deparse(tag(dots))
      if(first_name != 'NULL') names(li)[1] <- first_name
      
      lapply(li, function(x) {
        x <- list(x)
        while(inherits(x[[1]], 'promise')) {
          env <- prenv(x[[1]]) 
          x   <- list(prexpr(x[[1]]))
        }
        if(is.null(env)) env <- .GlobalEnv
        list(expr = x[[1]], env = env)
      })
    }
    

    (Note: Thanks to the OP for making several very useful suggestions for developing and improving this function via the comments)

    Now if we run the given examples, we have:

    Example 1

    n <- 1
    fun1 <- function(x, ..., y) {
      n <- 2
      fun2(n, ..., j=x, u = y, v = n)
    }
    
    fun2 <- function(u, ..., v) {
      n <- 3
      capture_dots()
    }
    
    res <- fun1(1, i=n, x = n, y = n)
    

    Resulting in

    res
    #> [[1]]
    #> [[1]]$expr
    #> n
    #> 
    #> [[1]]$env
    #> <environment: 0x0000022784b38020>
    #> 
    #> 
    #> [[2]]
    #> [[2]]$expr
    #> [1] 1
    #> 
    #> [[2]]$env
    #> <environment: R_GlobalEnv>
    #> 
    #> 
    #> $i
    #> $i$expr
    #> n
    #> 
    #> $i$env
    #> <environment: R_GlobalEnv>
    #> 
    #> 
    #> $j
    #> $j$expr
    #> x
    #> 
    #> $j$env
    #> <environment: 0x0000022784b38020>
    

    and

    lapply(res, function(x) eval(x[[1]], x[[2]]))
    #> [[1]]
    #> [1] 2
    #> 
    #> [[2]]
    #> [1] 1
    #> 
    #> $i
    #> [1] 1
    #> 
    #> $j
    #> [1] 1
    

    Example 2

    dots <- (function(...) get("..."))(1, i=n, x = n, y = n)
    res2 <- with(list(... = dots), fun1(...))
    lapply(res2, function(x) eval(x[[1]], x[[2]]))
    #> [[1]]
    #> [1] 2
    #> 
    #> [[2]]
    #> [1] 1
    #> 
    #> $i
    #> [1] 1
    #> 
    #> $j
    #> [1] 1
    

    Example 3

    foo <- function(...) {
      bar <- function() capture_dots()
      bar()
    }
    
    foo(a=x)
    #> $a
    #> $a$expr
    #> x
    #> 
    #> $a$env
    #> <environment: R_GlobalEnv>
    
    
    foo2 <- function(...) {
      bar <- function(...) capture_dots()
      bar()
    }
    
    foo2(a=x)
    #> list()
    

    Addendum

    To do the whole thing in R would involve walking the call stack, grabbing arguments and environments as you go. This works well for the majority of use cases but won't work when there are eval calls on the stack such as with, as in the second example. This is included to show how something similar can be done in base R. It could be developed further to handle calls to eval etc, but is already much more complex than the above solution.

    capture_dots2 <- function() {
      
        dots <- tryCatch(
        get("...", parent.frame()),
        error = function(e) list()
      )
      
      if(identical(dots, list())) return(list())
      
      ss <- lapply(sys.status(), function(x) rev(head(x, -2L)))
      ss$sys.frames <- c(ss$sys.frames[-1], parent.env(tail(ss$sys.frames, 1)[[1]]))
      stack <- list(call_stack  = ss$sys.calls, call_frames = ss$sys.frames)
      stack$call_stack <- lapply(stack$call_stack, function(x) as.call(as.list(x)))
      
      get_args <- function(x) as.list(x)[nzchar(names(as.list(x)))]
      funcs <- rev(lapply(seq_along(stack$call_stack), sys.function))
      stack$frml <- lapply(funcs, get_args)
      stack$args <- lapply(stack$call_stack, function(x) as.list(x)[-1])
      dots <- Map(function(args, frmls) {
        if(!'...' %in% names(frmls) || is.null(names(frmls))) return(NULL)
        args <- args[!sapply(args, function(x) identical(x, quote(...)))]
        if(length(frmls) == 1) return(args)
        if(is.null(names(args))) names(args) <- rep("", length(args))
        matched_frmls   <- which(names(frmls) %in% names(args))
        matched_args    <- which(names(args) %in% names(frmls))
        if(length(matched_args))  args  <- args[seq_along(args)[-matched_args]]
        if(length(matched_frmls)) frmls <- frmls[seq_along(frmls)[-matched_frmls]]
        dot_frml <- which(names(frmls) == "...")
        pre_dot <- if(dot_frml == 1) numeric() else seq(dot_frml - 1)
        unnamed_args <- which(!nzchar(names(args)))
        if(length(unnamed_args) > length(pre_dot) && length(pre_dot) > 0) {
          args <- args[-unnamed_args[pre_dot]]
        }
        args
      }, stack$args, stack$frml)
      
      envs <- stack$call_frames[lengths(dots) > 0]
      dots <- dots[lengths(dots) > 0]
      result <- list()
      for(i in seq_along(dots)) {
        for(j in rev(seq_along(dots[[i]]))) {
          li <- list(expr = dots[[i]][[j]], env = envs[[i]])
          if(identical(li$expr, quote(...))) next
          nm <- names(dots[[i]])[j]
          nms <- names(result)
          result <- c(list(li), result)
          names(result) <- c(nm, nms)
        }
      }
      rev(result)[order(names(rev(result)))]
    }