For my package I am looking for a method to identify all function assignments in a user supplied R
script, without executing it.
So let's assume the following example script (ex.R
):
ex.R
## user supplied script
a <- function(x) 1
b1 = b2 <- function() {
y <- 1
2 -> j
j
}
d <<- function(x) {
k <- function(l) 1
k(x)
}
(function(x) 2) -> e
(function() {
y <- 1
2 -> j
j
}) -> f1 -> f2
(function() 1)()
g <- 4
5 -> h
i <- lapply(1:3, FUN = function(x) x + 1)
assign('j', function() 1)
k1 <- (function() {1}) -> k2
The function should return c("a", "b1", "b2", "d", "e", "f1", "f2", "j", "k1", "k2")
I came up with the following solution:
library(dplyr)
code <- parse("ex.R")
get_identifier <- function(nm) {
if (is.symbol(nm)) {
deparse(nm)
} else if (is.character(nm)) {
nm
} else {
stop(paste0("unknown type", typeof(nm), "for `nm`"))
}
}
get_fns <- function(expr) {
assignment_ops <- c("<-", "=", "assign", "<<-")
is_assign <- deparse(expr[[1L]]) %in% assignment_ops
is_call <- is_assign && is.call(expr[[3L]])
if (is_call) {
next_call <- deparse(expr[[3L]][[1L]])
if (next_call == "function") {
get_identifier(expr[[2L]])
} else if (next_call %in% c(assignment_ops, "(")) {
c(get_identifier(expr[[2L]]), Recall(expr[[3L]]))
} else {
NULL
}
} else {
NULL
}
}
unlist(lapply(code, get_fns))
# [1] "a" "b1" "b2" "d" "e" "f2" "f1" "j" "k1" "k2"
Which is correct for at least this use case.
But adding just two other nasty edge cases will break the code:
l1 <- (1 + (l2 <- function(x) 2 * x)(3))
(m <- function(x) x)
should return c("l2", "m")
, but it does not. My recursion is somewhere at fault, but I cannot spot the problem. How would I fix the code?
The comment showed that I should explain a bit what I want to achieve eventually:
script.R
say) and transforms this script into a script which has a Command Line Interface (script_ammended.R
say), that is which can be eventually called via Rscript ammended_script.R [ARGS]
.A contrived example may look like this:
script.R
greet <- function(msg, from = "me") {
#! short: -g
#! params: [., -f]
#! description: greeting <msg> from user <me> is shown
print(paste0("Message from <", from, ">: ", msg))
}
bye <- function() {
#! short: -b
greet("Good Bye", "system")
}
greet("Test")
This would be a typical user script, which can be quite conveniently used interactively. Now, my package should take this script and turn it into the following script:
script_amended.R
library(optigrab)
greet <- function(msg, from = "me") {
print(paste0("Message from <", from, ">: ", msg))
}
bye <- function() {
greet("Good Bye", "system")
}
msg <- opt_get("g", default = NA_character_,
description = "greeting <msg> from user <me> is shown")
from <- opt_get("f", default = "me")
bye_flag <- opt_get("b", default = FALSE)
if (!is.na(msg)) {
greet(msg, from)
quit(status = 0)
} else if (bye_flag) {
bye()
quit(status = 0)
}
After a good sleep, I could reduce the (recursive) problem to 5 simple cases:
expr
) is not a call, we stop and return NULL
.function
, we simply return the vector of potential identifiers collected so far.NULL
.extract_function <- function(expr, identifiers = NULL) {
.OP <- 1L
.LHS <- 2L
.RHS <- 3L
.ASGNM <- c("<-", "<<-", "=", "assign")
if (is.call(expr)) {
op <- deparse(expr[[.OP]])
if (op == "(") {
## bracket case: simply recurse into the call and keep identifiers
res <- Recall(expr[[-.OP]], identifiers)
} else if (op == "function") {
## function case: we can stop and return stored identifiers
res <- identifiers
} else if (op %in% .ASGNM) {
## assignment case: add LHS to potential list of identifiers
res <- Recall(expr[[.RHS]], c(as.character(expr[[.LHS]]), identifiers))
} else {
## else case: drop identifiers and recurse into function
res <- lapply(expr, extract_function, identifiers = NULL) |>
unlist()
}
} else {
res <- NULL
}
res
}
unlist(lapply(parse("ex.r"), extract_function))
# [1] "a" "b2" "b1" "d" "e" "f1" "f2" "j" "k2" "k1" "l2" "m"