Search code examples
rdplyrpurrr

Apply a function in R on each row: function takes multiple columns from each row and returns multiple new columns


The general idea of this question has been asked here

However the answer did not work in my specific case since I want to use a third input into the function, which is a large dataframe. I have also tried using sapply as per this post but that still does not work.

My goal is to avoid having to create the new columns by hand/use a for loop to append into the new columns. Is this possible in R? Is there another more 'R' way to structure my data and/or function? I looked at purrr::pmap but I don't know how to have it output multiple columns

Here is my minimal reproducible example:

library(tidyverse)

find_sample_gaps<-function(site, analyte, df){
  
  Sample <- df%>%
    filter(site_code == site)%>%filter(analyte_code == analyte)%>%
    mutate(Year = as.numeric(format(Date, '%Y')))
  
  x<-Sample%>%
    group_by(Year)%>%
    summarize(n_samples = length(Year))
  
  gaps<-which(c(1,diff(x$Year))>1)
  
  a<-sum(x$n_samples) 
  b<-length(unique(Sample$Date))
  c<-'No gaps'
  
  if(length(gaps)>0){ 
    c<-paste('There are', as.character(gaps), 'gaps')
  }
  
  return(cbind(a,b,c))
}

# use function inside cbind to add columns to dataframe

result<-cbind(output1, find_sample_gaps(output1$site_code, output1$analyte_code, output2)) # throws error because output2 dataframe isn't the same size as result?

# another attempt also using cbind with sapply

result<-cbind(output1, t(sapply(c(output1$site_code, output1$analyte_code, output2), find_sample_gaps))) # also throws error, does not recognize the inputs into the function?

Here is my input data:

output1<-structure(list(site_code = c("a", "b", "c", "d", "e", "f", "g", 
"h", "i", "j", "j", "j", "j", "j", "j", "j", "k", "k", "k", "k", 
"k", "k", "k", "l", "l", "l", "l", "l", "l", "m", "n", "o", "p", 
"q", "r", "s", "t", "u", "v", "w", "w", "w", "w", "w", "x", "x", 
"x", "x", "x", "y", "y", "y", "z", "z", "z", "z", "z", "aa", 
"aa", "aa", "aa", "aa", "aa", "aa", "bb", "bb", "bb", "bb", "bb", 
"cc", "cc", "cc", "cc", "cc", "dd", "dd", "dd", "dd", "dd", "ee", 
"ee", "ee", "ee", "ee", "ee", "ee", "ff", "ff", "ff", "ff", "ff", 
"gg", "gg", "gg", "gg", "gg", "hh", "hh", "hh", "hh", "hh", "hh", 
"ii", "ii", "ii", "ii", "ii", "ii", "jj", "jj", "jj", "jj", "jj", 
"jj", "jj"), analyte_code = c("a", "a", "a", "a", "a", "a", "a", 
"a", "a", "b", "c", "d", "e", "a", "f", "g", "b", "c", "d", "e", 
"a", "f", "g", "c", "d", "e", "a", "f", "g", "a", "a", "a", "a", 
"a", "a", "a", "a", "a", "a", "d", "e", "a", "f", "g", "d", "e", 
"a", "f", "g", "a", "f", "g", "d", "e", "a", "f", "g", "b", "c", 
"d", "e", "a", "f", "g", "d", "e", "a", "f", "g", "d", "e", "a", 
"f", "g", "d", "e", "a", "f", "g", "b", "c", "d", "e", "a", "f", 
"g", "d", "e", "a", "f", "g", "d", "e", "a", "f", "g", "c", "d", 
"e", "a", "f", "g", "c", "d", "e", "a", "f", "g", "b", "c", "d", 
"e", "a", "f", "g")), row.names = c(NA, -115L), class = c("tbl_df", 
"tbl", "data.frame"))
output2<-structure(list(site_code = c("dd", "k", "k", "r", "aa", "ii", 
"y", "l", "l", "l", "q", "cc", "w", "bb", "c", "ff", "m", "ii", 
"p", "ff", "ff", "z", "ff", "l", "w", "hh", "ff", "ff", "ff", 
"k", "j", "bb", "x", "hh", "jj", "z", "dd", "q", "aa", "k", "bb", 
"r", "e", "j", "j", "ii", "y", "hh", "p", "p", "u", "gg", "ff", 
"p", "cc", "u", "dd", "n", "bb", "bb", "aa", "ff", "x", "k", 
"w", "x", "j", "bb", "cc", "ii", "hh", "jj", "b", "hh", "y", 
"u", "cc", "hh", "aa", "b", "jj", "hh", "gg", "y", "r", "a", 
"aa", "aa", "z", "ff", "ee", "g", "hh", "hh", "cc", "hh", "hh", 
"h", "l", "k"), analyte_code = c("e", "b", "b", "c", "f", "d", 
"a", "a", "a", "d", "f", "c", "g", "a", "a", "e", "a", "e", "a", 
NA, "c", "a", "d", "c", "d", "b", "a", "f", "a", "g", "b", "c", 
"f", "f", "c", "a", "f", "a", "e", "g", "c", "a", "a", "b", "e", 
"a", "e", "c", "a", "a", "a", "a", "b", "a", "e", "a", "f", "a", 
"a", "a", "c", "e", "a", "e", "a", "c", "e", "c", "a", "e", "c", 
"a", "a", "g", "c", "a", "b", "b", "f", "b", "e", "d", "d", "c", 
"c", "a", "a", "b", "f", "f", "b", "a", "e", "g", "c", "a", "a", 
"a", "e", "d"), Date = structure(c(13326, 14287, 14403, 17669, 
16330, 18603, 17428, 15502, 18708, 13780, 17757, 18582, 18087, 
18582, 17433, 13326, 17674, 13668, 18059, 17966, 16701, 17142, 
14915, 16861, 13999, 15502, 15412, 16856, 14551, 18708, 12128, 
14314, 13326, 12563, 13780, 17224, 17611, 15703, 16239, 13780, 
12970, 16096, 16544, 17134, 18603, 13780, 18388, 15684, 19157, 
18684, 17449, 18857, 15075, 18746, 12683, 15618, 17142, 18634, 
15601, 17065, 15926, 12970, 17611, 16692, 13943, 12871, 16958, 
13263, 13451, 16179, 13094, 15044, 18131, 12212, 15966, 16410, 
14775, 13283, 16239, 16391, 17050, 13283, 16085, 16330, 17362, 
18393, 18087, 13724, 14396, 14396, 17331, 19106, 14215, 13388, 
14088, 18241, 18143, 17187, 13486, 12482), class = "Date")), row.names = c(NA, 
100L), class = "data.frame")

Solution

  • Does this give you what you are after? Note I changed the return of the function to be a list.

    find_sample_gaps<-function(site, analyte, df){
        
        Sample <- df%>%
            filter(site_code == site)%>%filter(analyte_code == analyte)%>%
            mutate(Year = as.numeric(format(Date, '%Y')))
        
        x<-Sample%>%
            group_by(Year)%>%
            summarize(n_samples = length(Year))
        
        gaps<-which(c(1,diff(x$Year))>1)
        
        a<-sum(x$n_samples) 
        b<-length(unique(Sample$Date))
        c<-'No gaps'
        
        if(length(gaps)>0){ 
            c<-paste('There are', as.character(gaps), 'gaps')
        }
        
        comb <- list(a = a, b = b, c = c)
        
        return(comb)
    }
    
    output3 <- output1 %>%
        mutate(a = find_sample_gaps(site, analyte, all_of(output2))$a,
               b = find_sample_gaps(site, analyte, all_of(output2))$b,
               c = find_sample_gaps(site, analyte, all_of(output2))$c) 
    
    output3
       site_code analyte_code     a     b c      
       <chr>     <chr>        <int> <int> <chr>  
     1 a         a                1     1 No gaps
     2 b         a                1     1 No gaps
     3 c         a                1     1 No gaps
     4 d         a                1     1 No gaps
     5 e         a                1     1 No gaps
     6 f         a                1     1 No gaps
     7 g         a                1     1 No gaps
     8 h         a                1     1 No gaps
     9 i         a                1     1 No gaps
    10 j         b                1     1 No gaps