r least-squares data-fitting pde model-fitting

Fitting cell growth model (Monod) with 3 PDEs in R

Updated The problem is solved with the answer from @tpetzoldt, the original code has been modified to run the fit successfully.

Hi everybody, I'm trying to fit the experimental data on a set of 3 PDEs to find 4 coefficients including mumax, Ks, Y_(x/s), and Y_(p/s). The code I used worked with the set of 2 PDEs but is not working with this set of 3. The following is the code:

The set of PDEs needs to be fitted:

dX/dt = mumax . S . X / (Ks + S)

dS/dt = -1/Y_(x/s) . mumax . S . X / (Ks + S)

dP/dt = alpha . dX/dt + beta . X

library(deSolve)
library(ggplot2) 
library(minpack.lm) 
library(reshape2)

time <- c(0, 3, 5, 8, 9.5, 11.5, 14, 16, 18, 20, 25, 27)
X <- c(0.0904, 0.1503, 0.2407, 0.3864, 0.5201, 0.6667, 0.8159, 0.9979, 1.0673, 1.1224, 1.1512, 1.2093)
S <- c(9.0115, 8.8088, 7.9229, 7.2668, 5.3347, 4.911, 3.5354, 1.4041, 0, 0, 0, 0)
P <- c(0.0151, 0.0328, 0.0621, 0.1259, 0.2949, 0.3715, 0.4199, 0.522, 0.5345, 0.6081, 0.07662, 0.7869)
data <- data.frame(time, X, S, P)

Monod <- function(t,c,parms) {
  k1 <- parms$k1 # mumax
  k2 <- parms$k2 # Ks
  k3 <- parms$k3 # Y_(X/S)
  k4 <- parms$k4 # alpha
  k5 <- parms$k5 # beta
  r <- numeric(length(c))
  r[1] <-  k1 * c["S"] * c["X"] / ( k2 + c["S"] ) # r[1] = dCx/dt = k1.Cs.Cx/(k2+Cs)
  r[2] <- -k1 * c["S"] * c["X"] / ( ( k2 + c["S"] ) * k3 ) # r[2] = dCs/dt = -1/k3 * dCx/dt 
  r[3] <-  k4 * r[1] + k5 * c["X"] # r[3] = dCp/dt = alpha * dX/dt + beta * X
  return(list(r))       
}

residuals <- function(parms){
  cinit <- c( X=data[1,2], S=data[1,3], P=data[1,4] )
  
  t <- c(seq(0, 27, 1), data$time)
  t <- sort(unique(t))
  
  k1 <- parms[1]
  k2 <- parms[2]
  k3 <- parms[3]
  k4 <- parms[4]
  k5 <- parms[5]
  
  out <- ode( y = cinit, 
              times = t, 
              func = Monod, 
              parms = list( k1 = k1, k2 = k2, k3 = k3, k4 = k4, k5 = k5) )
  
  out_Monod <- data.frame(out)
  out_Monod <- out_Monod[out_Monod$t %in% data$time,]
  
  pred_Monod <- melt(out_Monod,id.var="time",variable.name="Substance",value.name="Conc")
  exp_Monod <- melt(data,id.var="time",variable.name="Substance",value.name="Conc")
  residuals <- pred_Monod$Conc-exp_Monod$Conc
  
  return(residuals)
}

parms <- c(k1=0.5, k2=6.5, k3=0.2, k4=1.2, k5 = 0.1)
fitval <- nls.lm(par=parms,fn=residuals)
cinit <- c(X=data[1,2], S=data[1,3], P=data[1,4])
out <- ode(y=cinit, times=seq(min(data$time), max(data$time)), func = Monod, parms=as.list(coef(fitval)))
plot(out, obs=data, mfrow=c(1, 3))

Solution

The original code had several issues:

the state variables were lower case in the data, upper case in the model and without any name in the initial values
time was called t in the data and time in the ode output
concentration had the name Concentration in one place and conc in another
The names ssq and ssqres were misleading as the function calculates the residuals and not the square and not a sum. The sum of squares is calculated in the background (see ?nls.lm)
the assignment operators <- and = were inconsistently used. This is not an error, but not really good style.
instead of artificial names k1, K2, ... one can directly use original names like mumax or alpha.
It is not clear why names like data2 or Monod2 were used as there was no data1.

Fixing all this, we come to the solution below. It runs through and fits somehow, but it can still be improved. The warnings may be ignored for now. The reason is, that parameters exceed the allowed range. This can of course be fixed but would be another question.

library(deSolve)
library(ggplot2) 
library(minpack.lm)
library(reshape2)

data <- data.frame(
  time = c(0, 3, 5, 8, 9.5, 11.5, 14, 16, 18, 20, 25, 27),
  X = c(0.0904, 0.1503, 0.2407, 0.3864, 0.5201, 0.6667, 0.8159, 0.9979, 1.0673, 1.1224, 1.1512, 1.2093),
  S = c(9.0115, 8.8088, 7.9229, 7.2668, 5.3347, 4.911, 3.5354, 1.4041, 0, 0, 0, 0),
  P = c(0.0151, 0.0328, 0.0621, 0.1259, 0.2949, 0.3715, 0.4199, 0.522, 0.5345, 0.6081, 0.07662, 0.7869)
)

Monod <- function(t, c, parms) {
  k1 <- parms$k1 # mumax
  k2 <- parms$k2 # Ks
  k3 <- parms$k3 # Y_(X/S)
  k4 <- parms$k4 # alpha
  k5 <- parms$k4 # beta
  r <- numeric(length(c))
  r[1] <-  k1 * c["S"] * c["X"] / (k2 + c["S"])
  r[2] <- -k1 * c["S"] * c["X"] / ((k2 + c["S"]) * k3)
  r[3] <-  k4 * r[1] + k5 * c["X"]
  return(list(r))       
}

cinit <- c(X = data[1, 2], S = data[1, 3], P = data[1, 4])
t <- seq(0, 27, 1)
parms <- list(k1=0.1, k2=5, k3=1, k4=0.08, k5 = 0.005)

out <- ode(y=cinit, times=t, func = Monod, parms=parms)
plot(out)

residuals <- function(parms){
  cinit <- c(X = data[1, 2], S = data[1, 3], P = data[1, 4] )
  
  # time points for which conc is reported including the points where data is available
  t <- c(seq(0, 27, 1), data$t)
  t <- sort(unique(t))
  
  # parameters from the parameter estimation routine:
  k1 <- parms[1]
  k2 <- parms[2]
  k3 <- parms[3]
  k4 <- parms[4]
  
  # solve ODE for a given set of parameters
  out <- ode(y = cinit, times = t, func = Monod, parms = list( k1 = k1, k2 = k2, k3 = k3, k4 = k4) )
  
  # Filter data that contains time points where data is available:
  out_Monod <- data.frame(out)
  out_Monod <- out_Monod[out_Monod$t %in% data$t,]
  
  # Evaluate predicted vs experimental residual
  pred_Monod <- melt(out_Monod, id.var="time", variable.name="Substance", value.name="conc")
  exp_Monod  <- melt(data, id.var="time", variable.name="Substance", value.name="conc")
  residuals  <- pred_Monod$conc - exp_Monod$conc
  
  # return predicted vs experimental residual
  return(residuals)
}

# initial guess for parameters
parms <- c(k1=0.5, k2=6.5, k3=0.2, k4=1.2)
fitval <- nls.lm(par=parms, fn=residuals)
summary(fitval)

out <- ode(y=cinit, times=seq(min(data$time), max(data$time)), func = Monod, parms=as.list(coef(fitval)))
plot(out, obs=data, mfrow=c(1, 3))

fitted model