Search code examples
rfunctionevalr-package

How to scan all function formals in a package for a particular parameter such as `na.rm`?


I want to review the use of na.rm in the base packages initially, but the general idea would be to find any parameter in any packages.

Packages

packages = (.packages());

Functions in Packages

I don't know how reliable this is, as it was later throwing errors on formals

# https://stackoverflow.com/questions/8696158/find-all-functions-including-private-in-a-package
functions.listFromPackage = function(pkg = "stats")
    {
    all     = ls( getNamespace(pkg),        all.names = TRUE); 
    # public has to be loaded ... 
    public  = ls( paste0("package:", pkg),  all.names = TRUE);
    private = setdiff(all, public);
    list("public" = public, "private" = private);
    }

Formals

Since the list above gives me strings, I guess I have to parse so formals will like the input.

has.na = NULL;
## LOOP over fn.str in package
fn.obj = eval(parse(text=fn.str));
x = formals( fn.obj );
if(is.primitive(fn.obj)) { x = formals(args(fn.obj)); }
if("na.rm" %in% names(x) )
          {
            row = c(package, fn, x$na.rm);
            has.na = rbind(has.na, row);
          }

Preliminary Results

> has.na
    [,1]      [,2]              [,3]   
row "stats"   "approx"          "TRUE" 
row "stats"   "approxfun"       "TRUE" 
row "stats"   "density.default" "FALSE"
row "stats"   "fivenum"         "TRUE" 
row "stats"   "heatmap"         "TRUE" 
row "stats"   "IQR"             "FALSE"
row "stats"   "mad"             "FALSE"
row "stats"   "median"          "FALSE"
row "stats"   "median.default"  "FALSE"
row "stats"   "medpolish"       "FALSE"
row "stats"   "sd"              "FALSE"
row "stats"   "var"             "FALSE"
row "methods" "Summary"         "FALSE"
row "base"    ".colMeans"       "FALSE"
row "base"    ".colSums"        "FALSE"
row "base"    ".rowMeans"       "FALSE"
row "base"    ".rowSums"        "FALSE"
row "base"    "all"             "FALSE"
row "base"    "any"             "FALSE"

It is throwing a lot of errors.

Warning messages:
1: In formals(fun) : argument is not a function

# for example fn.str = "p.adjust.methods"
Error in get(fun, mode = "function", envir = envir) : 
  first argument has length > 1

# for example fn.str = "contrasts<-"
Error in parse(text = fn) : <text>:2:0: unexpected end of input
1: contrasts<-
# obvious why `eval` fails here.  How to get the formals in a variadic way (as a list of strings?)

So I am thinking my collection of functions may be inaccurate?

Question: how to write a function that captures all functions that use a parameter (with the default value) for a given set of package(s)?

==Accepted Solution==

The match.fun() was the key element of the solution. I have my function loop over the GLOBAL ls(all.names=TRUE) for local functions, then I loop over a list of packages specified in the package call.

Here is the head/str of one result:

> head(df)
        pkg  fn.is         fn.name param.value
row   stats public          approx        TRUE
row.1 stats public       approxfun        TRUE
row.2 stats public density.default       FALSE
row.3 stats public         fivenum        TRUE
row.4 stats public         heatmap        TRUE
row.5 stats public             IQR       FALSE
> str(df)
'data.frame':   44 obs. of  4 variables:
 $ pkg        : chr  "stats" "stats" "stats" "stats" ...
 $ fn.is      : chr  "public" "public" "public" "public" ...
 $ fn.name    : chr  "approx" "approxfun" "density.default" "fivenum" ...
 $ param.value: chr  "TRUE" "TRUE" "FALSE" "TRUE" ...
 - attr(*, "param")= chr "na.rm"
 - attr(*, "packages")= chr [1:2] "stats" "base"
> 

I intentionally did NOT cast to anything on the dataframe. The na.rm example has unique: TRUE, FALSE, "" (listed as a function parameter without any default assigned). Post processing on the function can create factors and so on.

TODO: allow for the parameter to be a vector, maybe JSON.stringify using jsonlite::FUNCTIONS


Solution

  • functions in a package

    x <- ls(getNamespace('stats'), all.names = TRUE)
    y <- sapply(x, function(x) inherits(try(match.fun(x), silent = TRUE), 'try-error'))
    f <- x[!y]
    head(f)
    # [1] ".checkMFClasses" ".getXlevels"     ".lm.fit"         ".MFclass"        ".nknots.smspl"   ".preformat.ts"  
    

    functions that have na.rm as an arg

    z <- sapply(f, function(x) is.null(formals(match.fun(x))$na.rm))
    head(f[!z])
    # [1] "approx"          "approxfun"       "density.default" "fivenum"         "heatmap"         "IQR"            
    

    the default value of na.rm for each

    head(sapply(f[!z], function(x) formals(match.fun(x))$na.rm))
    # approx       approxfun density.default         fivenum         heatmap             IQR 
    #   TRUE            TRUE           FALSE            TRUE            TRUE           FALSE 
    

    You could combine these into a single step

    f <- function(package) {
      p <- ls(getNamespace(package), all.names = TRUE)
      y <- lapply(p, function(x) {
        mf <- try(match.fun(x), silent = TRUE)
        pr <- is.primitive(mf)
        
        if (inherits(mf, 'try-error') ||
            is.null(args(mf)) ||
            (pr && !is.logical(formals(args(mf))$na.rm)) ||
            (!is.logical(formals(mf)$na.rm)))
          NULL
        else {
          data.frame(
            package = package,
            'function' = x,
            na.rm = if (pr)
              formals(args(mf))$na.rm else formals(mf)$na.rm
          )
        }
      })
      
      do.call('rbind', y)
    }
    
    f('stats')
    #    package       function. na.rm
    # 1    stats          approx  TRUE
    # 2    stats       approxfun  TRUE
    # 3    stats density.default FALSE
    # 4    stats         fivenum  TRUE
    # 5    stats         heatmap  TRUE
    # 6    stats             IQR FALSE
    # 7    stats             mad FALSE
    # 8    stats          median FALSE
    # 9    stats  median.default FALSE
    # 10   stats       medpolish FALSE
    # 11   stats              sd FALSE
    # 12   stats             var FALSE
    
    f('methods')
    #   package function. na.rm
    # 1 methods   Summary FALSE
    
    f('base')
    #    package         function. na.rm
    # 1     base         .colMeans FALSE
    # 2     base          .colSums FALSE
    # 3     base         .rowMeans FALSE
    # 4     base          .rowSums FALSE
    # 5     base          colMeans FALSE
    # 6     base           colSums FALSE
    # 7     base       is.unsorted FALSE
    # 8     base      mean.default FALSE
    # 9     base              pmax FALSE
    # 10    base          pmax.int FALSE
    # 11    base              pmin FALSE
    # 12    base          pmin.int FALSE
    # 13    base     range.default FALSE
    # 14    base          rowMeans FALSE
    # 15    base rowsum.data.frame FALSE
    # 16    base    rowsum.default FALSE
    # 17    base           rowSums FALSE