I am having issues with for loops that calculate and run lags for the coefficient of variation of precipitation. I'm not quite sure how to generalize the question so I've added all the steps I've taken so far.
My main dataset "d" looks like this:
row.names timestamp station year month ndvi landcover altitude precipitation
1 1 1 A 2000 jan 0.4138 Mixed forest 2143 16.0
2 1769 2 A 2000 feb 0.4396 Mixed forest 2143 4.0
I would like to find the effects of lag 0:10 of the coefficient of variation of precipitation on the max ndvi of a year per station. Basically my code looks like this:
r <- aggr(d,c("station","landcover","year"), c("altitude=mean(altitude)","max.ndvi=NA","max.month=NA","max.timestamp=NA","max.precipitation=NA", "cv=NA"))
head(r)
station landcover year altitude max.ndvi max.month max.timestamp max.precipitation cv
1 A Mixed forest 2000 2143 NA NA NA NA NA
2 A Mixed forest 2001 2143 NA NA NA NA NA
for(i in 1:nrow(r)) {
tmp <- d[d$station==r$station[i] & d$year==r$year[i],]
idx <- which.max(tmp$ndvi);
r$max.month[i] <- as.character(tmp$month[idx]);
r$max.ndvi[i] <- tmp$ndvi[idx];
r$max.timestamp[i] <- tmp$timestamp[idx];
r$max.precipitation[i] <- tmp$precipitation[idx];
r$cv[i] <- sd(tmp$precipitation, na.rm = TRUE)/mean(tmp$precipitation, na.rm = TRUE)
}
for(lag in 0:10) {
cat("\n\n***** lag =",lag,"*****\n\n");
for(i in 1:nrow(r)) {
timestamp <- r$max.timestamp[i]-lag;
if(timestamp>0){
r$cv[i] <- r$cv[d$station==r$station[i] & d$timestamp==timestamp];
}
}
r <- na.omit(r)
print(summary(aov(max.ndvi~cv, data=r)));
for(lu in sort(unique(as.character(r$landcover)))) {
cat("\n----------------- Analysis for LU =",lu,"\n\n");
print(summary(aov(max.ndvi~cv,data=r[r$landcover==lu,])));
}
}
The problem I am getting is with the last part which assigns/loops the lags for every max.ndvi value. I would like a summary for each lag over all rows as well as a summary per land cover type.
I have tried various different combinations, but I keep getting errors. For the above code I get this error:
Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
0 (non-NA) cases
Can anyone offer any advice?
Thanks a lot.
I think some of your cv lags have all missing values (at the landcover level of detail). This code requires that the cv.lags (within the landcover class) have at least 3 observations.
#Create fake dataset in your same mold
set.seed(1)
d <- data.frame(row.names=1:40,timestamp=rep(1:10,4),station=c(rep("A",10),rep("B",10),rep("C",10),rep("D",10)),
year=rep(c(2000,2000,2000,2001,2001,2001,2001,2002,2002,2002),4),
month=rep(c("jan","feb","mar","april","may","jun","jul","aug","sep","oct"),4),ndvi=runif(40,min=0.3,max=0.6),
landcover=c(rep("Mixed Forest",10),rep("Sand",10),rep("other1",10),rep("other2",10)),altitude=runif(40,min=1500,max=5000),
precipitation=runif(40,min=2,max=20))
#Convert to data.table
require("data.table")
d <- data.table(d)
##Create your desired variables
r <- d[,list(altitude=mean(altitude,na.rm=T),
max.ndvi=max(ndvi,na.rm=T),
max.month=month[ndvi==max(ndvi,na.rm=T)],
max.timestamp=timestamp[ndvi==max(ndvi,na.rm=T)],
max.precipitation=precipitation[ndvi==max(ndvi,na.rm=T)],
cv=sd(precipitation,na.rm=T)/mean(precipitation,na.rm=T)
),by=c("station","landcover","year")]
##Create lagged variables
r[, c(paste("cv.lag", 1:10, sep="")) := lapply(1:10, function(i) c(rep(NA, i), head(cv, -i))), by=list(station,landcover)]
#Create list of the cv variable positions plus station,landcover, and year positions
pos <- grep("cv", names(r))
pos <- c(1:3,pos)
#Melt lagged variables
require("reshape2")
r.long <- melt(r[,pos,with=F],id.vars=c("station","landcover","year"),variable.name="cv",value.name="cv.val")
#Merge back on max ndvi
r.long <- merge(r.long,r[,list(station,landcover,year,max.ndvi)],by=c("station","landcover","year"),all.x=T)
#Only use landcovers and lags with at least 3 non-missing observations
r.ref <- r.long[,list(Obs=sum(is.na(cv.val)==0)),by=c("landcover","cv")][Obs>2]
r.long <- merge(r.ref,r.long,by=c("landcover","cv"))
#Print out anovas
r.long[,print(summary(aov(max.ndvi~cv.val))),by=c("landcover","cv")]
#If you just care about pvalues, use this code
lmp <- function (modelobject) {
if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
f <- summary(modelobject)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=F)
attributes(p) <- NULL
return(p)
}
#Print out results
r.long[,list(PVAL=lmp(lm(max.ndvi~cv.val))),by=c("landcover","cv")]