Search code examples
rapplylapplysapplymapply

Add a new column to each df in a list of dfs using apply function


Hello I have a list of dataframes where I want to add new columns to each of those dataframe. My current for-loop approach gets the job done, however I was looking for an elegant approach, something from apply family of functions.

Here is a reprex-


week_no<-function(x){
  year<-as.numeric(format(x, "%Y"))
  Jan1_day<-format(as.Date(paste(year,"-01-01", sep = "")), "%A")
  
  if (Jan1_day=="Monday"){
    
    week<-as.numeric(format(x,"%j")) %/% 7 + 1
    
  } else {
    dateseq<-data.frame(seq(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year, "-01-08", sep = "")), by="+1 day"))
    colnames(dateseq)<-"dates"
    dateseq$day<-format(dateseq$dates,"%A")
    if(x < dateseq[dateseq$day=="Monday",1]){
      week<-1
    } else {
      sub_Monday<-subset(dateseq, day=="Monday")
      sub_Monday<-sub_Monday[order(sub_Monday$dates),]
      first_Monday<-sub_Monday[1,1]
      week<-(as.numeric(format(x,"%j"))-as.numeric(format(first_Monday,"%j")))%/% 7+2
    }
    
  }
}


mapp_dfs <- list(
              l1= data.frame(Timestamp= c("1993-08-30T00","2002-01-16T00","2010-01-13T00","2016-11-08T00","2019-05-13T00"),
                         Value= c("13.53","1.55", "5.63", "7.32", "7.89"),
                         `Q code`= c("1","2","3","4","5")),
              l2= data.frame(Timestamp= c("1994-07-10T00","2003-01-26T00","2011-01-13T00","2016-11-08T00","2019-05-23T00"),
                             Value= c("13.53","1.55", "5.63", "9.31", "5.63"),
                             `Q code`= c("1","1","3","4","1")),
              l3= data.frame(Timestamp= c("1995-08-30T00","2004-01-16T00","2012-01-13T00","2013-11-08T00","2019-06-03T00"),
                             Value= c("1.36","5.63", "5.63", "7.32", "5.22"),
                             `Q code`= c("2","2","5","4","4"))
            )


lapply(mapp_dfs, transform, week_nums = week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])))

#********************** This method works *******************

for(i in seq_along(mapp_dfs)){

    mapp_dfs[[i]]$week_nums <-  sapply(as.Date(unlist(strsplit(mapp_dfs[[i]]$Timestamp, "T"))[ c(TRUE,FALSE) ]),
                          function(x) week_no(x))

}

I did attempt a few approaches but they lead to errors


lapply(mapp_dfs, function(x) 
  cbind(x, week_nums = week_no(as.Date(unlist(strsplit(x$Timestamp, "T"))[ c(TRUE,FALSE) ]))))

#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year,  : 
#  'from' must be of length 1
#In addition: Warning message:
#In if (Jan1_day == "Monday") { :
 
#Error in seq.Date(as.Date(paste(year, "-01-01", sep = "")), as.Date(paste(year,  : 
# 'from' must be of length 1 
mapply(cbind, mapp_dfs, "week_nums"=week_no(as.Date(unlist(strsplit(Timestamp, "T"))[ c(TRUE,FALSE) ])), SIMPLIFY=F)

# Error in strsplit(Timestamp, "T") : object 'Timestamp' not found 

Solution

  • The function week_no is not vectorised so you would need some kind of loop to iterate over each value after strsplit. In the for loop you use sapply, so we can use the same here.

    lapply(mapp_dfs, function(x) cbind(x, 
           week_nums = sapply(as.Date(unlist(strsplit(x$Timestamp, "T"))[c(TRUE,FALSE)]), week_no)))
    
    #$l1
    #      Timestamp Value Q.code week_nums
    #1 1993-08-30T00 13.53      1        36
    #2 2002-01-16T00  1.55      2         3
    #3 2010-01-13T00  5.63      3         3
    #4 2016-11-08T00  7.32      4        46
    #5 2019-05-13T00  7.89      5        20
    
    #$l2
    #      Timestamp Value Q.code week_nums
    #1 1994-07-10T00 13.53      1        28
    #2 2003-01-26T00  1.55      1         4
    #3 2011-01-13T00  5.63      3         3
    #4 2016-11-08T00  9.31      4        46
    #5 2019-05-23T00  5.63      1        21
    
    #$l3
    #      Timestamp Value Q.code week_nums
    #1 1995-08-30T00  1.36      2        36
    #2 2004-01-16T00  5.63      2         3
    #3 2012-01-13T00  5.63      5         3
    #4 2013-11-08T00  7.32      4        45
    #5 2019-06-03T00  5.22      4        23