I would like to fit a step function (two parameters) to some data. The code below is not doing the job. I wonder if the round()
argument is the problem. However, I also tried to divide the parameters to make small (e.g. 0.001) changes in the parameters to cause significant changes. But that did not change the fit. Any idea how to properly fit this function to the data?
dat <- c(rbinom(100, 100, 0.95), rbinom(50, 100, 0.01), rbinom(100, 100, 0.95))
plot(dat/100)
stepFnc <- function(parms, t) {
par <- as.list(parms)
(c(rep(1-(1e-5), par$t1), rep(1e-5, par$t2), rep(1-(1e-5), t)))[1:t]
}
lines(stepFnc(c(t1 = 50, t2 = 50), length(dat)))
loglik <- function(t1 = 50, t2 = 50) {
fit <- snowStepCurve(parms = list(t1=round(t1,0), t2=round(t2,0)), t = length(dat))
lines(fit)
-sum(dbinom(x = dat, size = 100, prob = fit, log = T), na.rm = T)
}
mle <- bbmle::mle2(loglik)
mle@coef
lines(snowStepCurve(mle@coef, length(dat)), lwd = 2, lty = 2, col = "orange")
With discrete x data I'd do a brute-force approach:
x <- seq_along(dat)
foo <- function(x, lwr, upr) {
y <- x
y[x <= lwr | x > upr] <- mean(dat[x <= lwr | x > upr])
y[x > lwr & x <= upr] <- mean(dat[x > lwr & x <= upr])
y
}
SSE <- function(lwr, upr) {
sum((dat - foo(x, lwr, upr))^ 2)
}
limits <- expand.grid(lwr = x, upr = x)
limits <- limits[limits$lwr <= limits$upr,]
nrow(limits)
SSEvals <- mapply(SSE, limits$lwr, limits$upr)
id <- which(SSEvals == min(SSEvals))
optlims <- limits[id,]
meanouter <- mean(dat[x <= optlims$lwr | x > optlims$upr])
meaninner <- mean(dat[x > optlims$lwr & x <= optlims$upr])
bar <- function(x) {
y <- x
y[x <= optlims$lwr | x > optlims$upr] <- meanouter
y[x > optlims$lwr & x <= optlims$upr] <- meaninner
y
}
plot(dat/100)
curve(bar(x) / 100, add = TRUE)