Search code examples
rcurve-fittingdata-fittinglog-likelihood

Maximum likelihood estimation using a step function


I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round() argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?

dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))

plot(dat/100)

stepFnc <- function(parms, t) {
  par <- as.list(parms)
  (c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}

lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))

loglik <- function(t1 = 50, t2 = 50) {
    fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
    lines(fit)
    -sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}

mle <- bbmle::mle2(loglik)
mle@coef

lines(snowStepCurve(mle@coef, length(dat)), lwd = 2, lty = 2, col = "orange")

Solution

  • With discrete x data I'd do a brute-force approach:

    x <- seq_along(dat)
    
    foo <- function(x, lwr, upr) {
      y <- x
      y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
      y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
      y
    }
    
    SSE <- function(lwr, upr) {
      sum((dat - foo(x, lwr, upr))^ 2) 
    }
    
    limits <- expand.grid(lwr = x, upr = x)
    limits <- limits[limits$lwr <= limits$upr,]
    nrow(limits)
    
    SSEvals <- mapply(SSE, limits$lwr, limits$upr)
    
    id <- which(SSEvals == min(SSEvals))
    optlims <- limits[id,]
    meanouter <-  mean(dat[x <= optlims$lwr | x > optlims$upr])
    meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
    
    bar <- function(x) {
      y <- x
      y[x <= optlims$lwr | x > optlims$upr] <- meanouter
      y[x > optlims$lwr & x <= optlims$upr] <- meaninner
      y
    }
    
    plot(dat/100)
    curve(bar(x) / 100, add = TRUE)
    

    resulting plot showing the fit