Nested functions in R - Issues-

i am editing my question with the functions i am working with. I have two functions: the first one does the calculation (difference between 2 datasets but only for one entry in the first dataset), the second does the calculation for the whole dataset 1. I wanted to create a new function that can give me the choice to whether do the calculation for one entry or the whole dataframe.

# Sample Data
data_R <- data.frame(
  IDr= c(seq(1,5)),
  BTR= c("A","B","AB","O","O"),
  A= c(0,1,rep(0,3)),
  B= c(0,rep(0,3),1),
  C= c(0,rep(1,3),0),
  D= c(0,rep(1,4)),
  E= c(1,1,0,rep(1,1),0),stringsAsFactors=FALSE)
data_R

data_D <- data.frame(
  IDd = c(seq(1,8)),
  BTD = c("A","B","AB","O","AB","AB","O","O"),
  A=c(rep(0,5),1,1,1),
  B=c(rep(0,6),1,1),
  C=c(rep(1,7),0),
  D=rep(1,8),
  E=c(rep(0,5),rep(1,2),0),
  fg=c(rep(0.0025, each=2),rep(0.00125, each=2),rep(0.0011, each=2),rep(0.0015, each=2)),
  stringsAsFactors=FALSE)
data_D

And here are the functions

# first function 
# difference for one patient
mismatch.i = function(D, R, i, threshold) {
  D = as.data.frame(D)
  R = as.data.frame(R)
  dif = purrr::map2_df(D[-1], R[i,-1], `-`)
  dif[dif<0] = 0
  dif$mismatch=rowSums(dif)
  dif = cbind(ID = D[1],R[i,1], dif)
  dif = dif[which(dif$mismatch <= threshold),]
  return(list=dif[c(1,2,ncol(dif))])
}

# the second function
# difference for the whole data frame data_R
mismtach.matrice <- function(D,R,threshold){ 
  D = as.matrix(D)
  R = as.matrix(R)
  diff.mat = do.call(rbind, lapply(1:nrow(R), function(x) mismatch.i(D,R,x,threshold)))
  diff.mat = as.data.frame(diff.mat)
  return(diff.mat)
}

And here's an example of running the functions

mis.i = mismatch.i(data_D[,c(1,3:7)], data_R[,c(1,3:7)], 1, 4)
mis.i
  IDd R[i, 1] mismatch
1   1       1        2
2   2       1        2
3   3       1        2
4   4       1        2
5   5       1        2
6   6       1        3
7   7       1        4
8   8       1        3


mis.whole = mismtach.matrice(data_D[,c(1,3:7)], data_R[,c(1,3:7)], 4)
mis.whole
    IDd R[i, 1] mismatch
1    1       1        2
2    2       1        2
3    3       1        2
4    4       1        2
5    5       1        2
6    6       1        3
7    7       1        4
8    8       1        3
9    1       2        0
10   2       2        0
11   3       2        0
12   4       2        0
13   5       2        0
14   6       2        0
15   7       2        1
16   8       2        1
17   1       3        0
18   2       3        0
19   3       3        0
20   4       3        0
21   5       3        0
22   6       3        2
23   7       3        3
24   8       3        2
25   1       4        0
26   2       4        0
27   3       4        0
28   4       4        0
29   5       4        0
30   6       4        1
31   7       4        2
32   8       4        2
33   1       5        1
34   2       5        1
35   3       5        1
36   4       5        1
37   5       5        1
38   6       5        3
39   7       5        3
40   8       5        1

I tried to include the first function in the 2nd one, here is what i did and i get an error because obviously i don't understand how nested functions work.

# in this main function D, R and Threshold should remain as arguments
mis.test = function(D, R, threshold) { 
  D = as.data.frame(D)
  R = as.data.frame(R)
  mismatch.i = function(D, R, i, threshold) {
    dif = purrr::map2_df(D[-1], R[i,-1], `-`)
    dif[dif<0] = 0
    dif$mismatch=rowSums(dif)
    dif = cbind(ID = D[1],R[i,1], dif)
    dif = dif[which(dif$mismatch <= threshold),]
    return(list=dif[c(1,2,ncol(dif))])
  }
  diff.mat = do.call(rbind, lapply(1:nrow(R), mismatch.i(x)))
  diff.mat = as.data.frame(diff.mat)
  return(diff.mat)
}
mis.test(data_D[,c(1,3:7)],data_R[,c(1,3:7)],4)
#  Error in mismatch.i(x) : object 'x' not found

I want to be able to run this function with either 1 entry in data_R or the whole data frame. If i run mis.test(data_D[,c(1,3:7)],data_R[1,c(1,3:7)],4) i would get the result of mis.i and if i run mis.test(data_D[,c(1,3:7)],data_R[,c(1,3:7)],4) i would get the result of mis.whole. I hope it is clear, thank you in advance for your help.

Solution

Your lapply is a bit off. You need to pass in a function. Right now you are attempting to call mismatch.i(x) and x isn't defined anywhere. Plus you defined mismatch.i to have additional parameters that you are not passing. It should look like

diff.mat = do.call(rbind, lapply(1:nrow(R), function(x) mismatch.i(D, R, x, threshold)))

Here we clearly make a function that lapply can call and pass the value of x to the i= parameter and pass along the result of the values.

Since it is a nested function, you could also leave out the redundant parmaters from the inner function (since they will never change) So you could do

mis.test = function(D, R, threshold) { 
  D = as.data.frame(D)
  R = as.data.frame(R)
  mismatch.i = function(i) {
    dif = purrr::map2_df(D[-1], R[i,-1], `-`)
    dif[dif<0] = 0
    dif$mismatch=rowSums(dif)
    dif = cbind(ID = D[1],R[i,1], dif)
    dif = dif[which(dif$mismatch <= threshold),]
    return(list=dif[c(1,2,ncol(dif))])
  }
  diff.mat = do.call(rbind, lapply(1:nrow(R), function(x) mismatch.i(x)))
  diff.mat = as.data.frame(diff.mat)
  return(diff.mat)
}