Search code examples
rloopssplitsapply

loop and apply family functions for climate and yield data to calculate correlation


I have a file including around 350 columns; year, temperature for each day , yield for different sites. I need to group or split data by year, then calculate the correlation test between yield and each temperature column one by one. I wrote the script below, however, it produce the results only for one year, is there any suggestion where is the problem/issue (it does not go through each year).

for (Y in unique(data_final$YEAR)) {
  # cat ("\n\n YEAR =", Y, "\n =========") # Write year Number
  subData <- data_final [data_final$YEAR == Y,] # Subset the data
  Tmax <- subData[, grepl ("TMAX", colnames (subData))]
  Yield <- subData$YIELD # get YIELD column
  cortest <- list ()
  
  for (i in 1:length (Tmax)) {
  cortest[[i]] <- cor(Tmax[[i]], Yield, use="pairwise.complete.obs", method = "pearson")
  
  }
  return(do.call ("rbind", cortest))
 }

Solution

  • Here is the answer

    corrresults <- as.data.frame(unique(data_final$YEAR))
    Tmax <- data_final[, grepl ("TMAX", colnames (data_final))]
    datasetup <- as.data.frame(matrix(data = NA, nrow=length(YEAR), ncol = length(Tmax)))
    corrresults <- cbind(corrresults, datasetup)
    colnames(corrresults) <- c("YEAR", seq(1, length(Tmax)))
    
    for (Y in 1:length(YEAR)) {
      
      subData <- data_final[data_final$YEAR == YEAR[Y],] # Subset the data
       
      Tmax <- subData[, grepl ("TMAX", colnames (subData))]
      Yield <- subData$YIELD # get YIELD column
      
      for (i in 1:length (Tmax)) {# Iterate over columns start with Tmax
        cortest <- cor(Tmax[[i]], Yield, use="pairwise.complete.obs", method = "pearson")
        corrresults[[Y, i+1]] <- cortest
          
      } # end of loop for 
     
    } # end of loop for YEAR
    
    write.csv(corrresults, file = "corrresults.csv")