Search code examples
rfunctionregressionglm

Fit a generalized linear model (glm) with a categorical variable of month using the function monthglm() in R


Problem

I have a data frame (see below) and I want to fit a general linear model (glm) with a categorical variable of the month using the function monthglm() based on the covariates of season and year.

After I run the following function, which was written by Barnett, A.G., Dobson, A.J. (2010) Analysing Seasonal Health Data. Springer. (see below), I keep on getting this error message.

If anyone can help, I would be deeply appreciative.

Load packages

library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)

Function:

monthglm<-function(formula,data,family=gaussian(),refmonth=1,
                   monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
  ## checks
  if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
  ## original call with defaults (see amer package)
  ans <- as.list(match.call())
  frmls <- formals(deparse(ans[[1]]))
  add <- which(!(names(frmls) %in% names(ans)))
  call<-as.call(c(ans, frmls[add]))
  
  monthvar=with(data,get(monthvar))
  cmonthvar=class(monthvar)
  ## If month is a character, create the numbers
  if(cmonthvar%in%c('factor','character')){
    if(cmonthvar=='character'){
      if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
      monthvar=factor(monthvar,levels=mlevels)
    }
    months=as.numeric(monthvar)
    data$month=months # add to data for flagleap
    months=as.factor(months)
    levels(months)[months]<-month.abb[months]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## Transform month numbers to names
  if(cmonthvar%in%c('integer','numeric')){
    months.u<-as.factor(monthvar)  
    nums<-as.numeric(nochars(levels(months.u))) # Month numbers
    levels(months.u)[nums]<-month.abb[nums]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## prepare data/formula
  parts<-paste(formula)
  f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
  dep<-parts[2] # dependent variable
  days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
  l<-nrow(data)
  if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} # 
  if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
  ###  data$off<-log(poff*moff)
  off<-log(poff*moff)  # 
  fit<-glm(formula=f,data=data,family=family,offset=off)
  ## return
  toret<-list()
  toret$call<-call
  toret$glm<-fit
  toret$fitted.values<-fitted(fit)
  toret$residuals<-residuals(fit)
  class(toret)<-'monthglm'
  return(toret)
}

#The levels of a factor must match the observed values. 
#If you want to change how those values print out, you need to change the labels. 

Error message

model<-monthglm(formula=Frequency_Blue~Year+Monsoon_Season, family=gaussian,
+                       offsetmonth=TRUE, refmonth=1, data=Final_New_Blue)
Error in nochars(levels(months.u)) : could not find function "nochars"

Dataframe

   structure(list(Year = c(2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L), Month = structure(c(5L, 5L, 5L, 4L, 4L, 
4L, 8L, 8L, 8L, 1L, 1L, 1L, 9L, 9L, 9L, 7L, 7L, 7L, 6L, 6L, 6L, 
2L, 2L, 2L, 12L, 12L, 12L, 11L, 11L, 11L, 10L, 10L, 10L, 3L, 
3L, 3L), .Label = c("April", "August", "December", "Feb", "Jan", 
"July", "June", "Mar", "May", "November", "October", "September"
), class = "factor"), Frequency_Blue_Whales_Year_Month = c(76L, 
78L, 66L, 28L, 54L, 37L, 39L, 31L, 88L, 46L, 23L, 54L, 5L, 8L, 
0L, 0L, 0L, 0L, 0L, 4L, 7L, 22L, 6L, 44L, 10L, 30L, 35L, 88L, 
41L, 35L, 4L, 30L, 43L, 65L, 43L, 90L), Season = structure(c(4L, 
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
5L, 5L, 5L), .Label = c("Autumn", "Spring", "Summer", "winter", 
"Winter"), class = "factor")), class = "data.frame", row.names = c(NA, 
-36L))

Arguments

enter image description here


Solution

  • I was able to get the model to run with the following simple change to the code and your function call. I named it monthglm2 to distinguish it from the package function. With calling your data df:

    library(season)
    library(MASS) # for mvrnorm
    library(survival) # for coxph
    library(ggplot2)
    
    
    monthglm2<-function(formula,data,family=gaussian(),refmonth=1,
                       monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
      ## checks
      if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
      ## original call with defaults (see amer package)
      ans <- as.list(match.call())
      frmls <- formals(deparse(ans[[1]]))
      add <- which(!(names(frmls) %in% names(ans)))
      call<-as.call(c(ans, frmls[add]))
    
      monthvar=with(data,get(monthvar))
      cmonthvar=class(monthvar)
      ## If month is a character, create the numbers
      if(cmonthvar%in%c('factor','character')){
        if(cmonthvar=='character'){
          if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
          monthvar=factor(monthvar,levels=mlevels)
        }
        months=as.numeric(monthvar)
        data$month=months # add to data for flagleap
        months=as.factor(months)
        levels(months)[months]<-month.abb[months]
        months<-relevel(months,ref=month.abb[refmonth]) # set reference month ### TYPO HERE, changed from months.u
      }
      ## Transform month numbers to names
      if(cmonthvar%in%c('integer','numeric')){
        months.u<-as.factor(monthvar)
        nums<-as.numeric(nochars(levels(months.u))) # Month numbers
        levels(months.u)[nums]<-month.abb[nums]
        months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
      }
      ## prepare data/formula
      parts<-paste(formula)
      f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
      dep<-parts[2] # dependent variable
      days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
      l<-nrow(data)
      if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} #
      if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
      ###  data$off<-log(poff*moff)
      off<-log(poff*moff)  #
      fit<-glm(formula=f,data=data,family=family,offset=off)
      ## return
      toret<-list()
      toret$call<-call
      toret$glm<-fit
      toret$fitted.values<-fitted(fit)
      toret$residuals<-residuals(fit)
      class(toret)<-'monthglm'
      return(toret)
    }
    
    
    df$year <- df$Year
    monthglm2(formula=Frequency_Blue_Whales_Year_Month~Year+Season, family=gaussian(),  offsetmonth=TRUE, monthvar='Month', refmonth=1, data=df)
    

    There was the additional issue in the function where I had to rename a column to year. If you look at the github for this package there is only one contributor and no issues raised. There are pros and cons to using packages such as this: they might have novel approaches that are useful, but bugs are not quickly identified and addressed. If you continue forward with seasonal analysis I'd recommend trying to learn the how to include seasonal modeling in glm directly