I have a file including around 350 columns; year, temperature for each day , yield for different sites. I need to group or split data by year, then calculate the correlation test between yield and each temperature column one by one. I wrote the script below, however, it produce the results only for one year, is there any suggestion where is the problem/issue (it does not go through each year).
for (Y in unique(data_final$YEAR)) {
# cat ("\n\n YEAR =", Y, "\n =========") # Write year Number
subData <- data_final [data_final$YEAR == Y,] # Subset the data
Tmax <- subData[, grepl ("TMAX", colnames (subData))]
Yield <- subData$YIELD # get YIELD column
cortest <- list ()
for (i in 1:length (Tmax)) {
cortest[[i]] <- cor(Tmax[[i]], Yield, use="pairwise.complete.obs", method = "pearson")
}
return(do.call ("rbind", cortest))
}
Here is the answer
corrresults <- as.data.frame(unique(data_final$YEAR))
Tmax <- data_final[, grepl ("TMAX", colnames (data_final))]
datasetup <- as.data.frame(matrix(data = NA, nrow=length(YEAR), ncol = length(Tmax)))
corrresults <- cbind(corrresults, datasetup)
colnames(corrresults) <- c("YEAR", seq(1, length(Tmax)))
for (Y in 1:length(YEAR)) {
subData <- data_final[data_final$YEAR == YEAR[Y],] # Subset the data
Tmax <- subData[, grepl ("TMAX", colnames (subData))]
Yield <- subData$YIELD # get YIELD column
for (i in 1:length (Tmax)) {# Iterate over columns start with Tmax
cortest <- cor(Tmax[[i]], Yield, use="pairwise.complete.obs", method = "pearson")
corrresults[[Y, i+1]] <- cortest
} # end of loop for
} # end of loop for YEAR
write.csv(corrresults, file = "corrresults.csv")