Curve Fit Parameters in Multiple ODE Function

I wan't to implement SEIR model of Effect of delay in diagnosis on transmission of COVID-19 (with little modification) using Python curve_fit and odeint. Without curve_fit, my code is like this:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt

def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):

    S_q,S,E1,E2,H,R,D,V=y

    dSq=qS*S-qSq*S_q
    dS=qSq*S_q-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
    dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
    dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
    dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
    dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
    dD=delta2*E2+deltaH*H # delta is for death
    dV=f1*E1+f2*E2+fH*H-d*V 

    dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
    return dy

if __name__ == "__main__":
    ## Parameters (dummy)
    qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d = \
        0, 1e-4, 4e-9, 1e-9, 4e-9, 1e-9, 1/100, 1/21, 1/104, 1/10, 1/200, 1/10400, 1/3.5, 1400, 1000, 1700, 144

    ## Initial (dummy)
    y_0=[1000,100000000,10,1,0,0,0,100]

    ## Solve
    t= np.linspace(1,60,60)
    result=odeint(func_ode,y_0,t,args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))

    ## Plot
    plt.plot(t, result[:, 0], label='Sq')
    plt.plot(t, result[:, 1], label='S')
    plt.plot(t, result[:, 2], label='E1')
    plt.plot(t, result[:, 3], label='E2')
    plt.plot(t, result[:, 4], label='H')
    plt.plot(t, result[:, 5], label='R')
    plt.plot(t, result[:, 6], label='D')
    plt.plot(t, result[:, 7], label='V')
    plt.legend(loc='best')
    plt.xlabel('t')
    plt.grid()
    plt.show()
    pass

To use optimized parameters with input data, this is my not working code:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import os
import pandas as pd

def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):

    S_q,S,E1,E2,H,R,D,V=y

    dSq=qS*S-qSq*S_q
    dS=qSq*S_q-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
    dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
    dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
    dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
    dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
    dD=delta2*E2+deltaH*H # delta is for death
    dV=f1*E1+f2*E2+fH*H-d*V 

    dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
    return dy

def func_y(t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d,y_0):
    """
    Solution to the ODE y'(t) = f(t,y,parameters) with initial condition y(0) = y_0
    """
    y = odeint(func_ode, y_0, t, args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))
    return y[1:,:]

if __name__ == "__main__":
    file_name='Data Dummy.xlsx'
    current_path=os.getcwd()
    file_path=os.path.join(current_path,file_name)
    sheet_name='Sheet1'

    df_raw=pd.read_excel(file_path,sheet_name=sheet_name)
    numpy_data=df_raw[[
        'Self-quarantine susceptible',
        'Susceptible',
        'E1 (OTG)',
        'E2 (ODP)',
        'H (Hospitalized: PDP + Positif)',
        'R (Sembuh)',
        'D (Meninggal)',
        'V (Virus)'
    ]].to_numpy()               

    ## Parameters (dummy)
    qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d = \
        0, 1e-4, 4e-9, 1e-9, 4e-9, 1e-9, 1/100, 1/21, 1/104, 1/10, 1/200, 1/10400, 1/3.5, 1400, 1000, 1700, 144

    # Used Data
    y_0=numpy_data[0,:].tolist()
    numpy_data=numpy_data[1:60,:]

    ## Reference Time
    number_of_reference_time,_=np.shape(numpy_data)

    ## Solve
    param = qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0
    t= np.linspace(1,number_of_reference_time,number_of_reference_time)
    popt, cov = curve_fit(func_y, t, numpy_data,p0=[param])

    qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d = popt

    ## Check Result
    result=odeint(func_ode,y_0,t,args=(qS,qSq,betaV1,betaV2,beta1,beta2,e1,eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d))

    ## Plot
    plt.plot(t, result[:, 0], label='Sq')
    plt.plot(t, result[:, 1], label='S')
    plt.plot(t, result[:, 2], label='E1')
    plt.plot(t, result[:, 3], label='E2')
    plt.plot(t, result[:, 4], label='H')
    plt.plot(t, result[:, 5], label='R')
    plt.plot(t, result[:, 6], label='D')
    plt.plot(t, result[:, 7], label='V')
    plt.legend(loc='best')
    plt.xlabel('t')
    plt.grid()
    plt.show()
    pass

The error result shows:

File "...\Programs\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 458, in func_wrapped
    return func(xdata, *params) - ydata
ValueError: operands could not be broadcast together with shapes (58,8) (59,8)

It seems that curve_fit can't fit odeint that has multiple graphs? Or I miss something here?

Edit: I edit fixed y[1:,:] into y.flatten() and popt, cov = curve_fit(func_y, t, numpy_data,p0=[param]) into popt, cov = curve_fit(func_y, t, numpy_data.flatten(),p0=[param]). Also, change the input into numpy.array(list) The code can be seen in pastebin. Now the problem become:

File "....py", line 164, in <module>
    popt, cov = curve_fit(func_y, t, numpy_data.flatten(),p0=[param])
  File "...\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 752, in curve_fit  
    res = leastsq(func, p0, Dfun=jac, full_output=1, **kwargs)
  File "...\Python\Python37\lib\site-packages\scipy\optimize\minpack.py", line 396, in leastsq    
    gtol, maxfev, epsfcn, factor, diag)
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'

Solution

There are a few problems: First, the error message is saying that the two arrays ydata and func(xdata, *params) are of different shape: (59, 8) and (58, 8). That might be because your func_y does:

 return y[1:,:]

But also: you will probably need to "flatten" your y data and the result from the model function to be 1-dimensional (472 observations), so that you have func_y do:

 return y.flatten()

and you run curve_fit with

 popt, cov = curve_fit(func_y, t, numpy_data.flatten(), p0=[param])

But, there is another conceptual problem that (AFAIK) curve_fit cannot handle. It appears that the final argument of your function func_y(), y_0 is an 8-element array, and meant to be the lower bound on the ODE integration, and not meant to be a variable parameter in the curve fitting. curve_fit expects the 1st argument of the model function to be a 1-d array of "independent variable" (here, t) and all arguments to be scalar quantities that will become variables in the fit.

When you do

param = qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0

you are making a tuple that has your 17 variables and 1 8-element array of y_0. curve_fit will do numpy.array(param) on that, expecting each element of param to be a scalar. With the last element being a list or array, this yields an object array which gives the error message you see.

For better organization of parameters and fit results, including named parameters that can easily be fixed or given bounds, and advanced methods for exploring uncertainties in parameter values and predictions, you might consider using lmfit (https://lmfit.github.io/lmfit-py/). In particular, lmfit.Model is a class for curve fitting that will identify your function arguments by name. Importantly for your example, it allows multiple independent variables and allows those independent variables to be of any Python type (not restricted to 1d arrays). lmfit.Model also does the flattening for you. With lmfit, your example code would look like this:

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from lmfit import Model

def func_ode(y,t,qS,qSq,betaV1,betaV2,beta1,beta2,e1,
             eta1,eta2,etaH,delta2,deltaH,theta2,f1,f2,fH,d):

    Sq,S,E1,E2,H,R,D,V=y

    dSq=qS*S-qSq*Sq
    dS=qSq*Sq-(betaV1*V+betaV2*V+beta1*E1+beta2*E2)*S
    dE1=(betaV1*V+beta1*E1)*S-(e1+eta1)*E1
    dE2=(betaV2*V+beta2*E2)*S+e1*E1-(eta2+delta2+theta2)*E2
    dH=theta2*E2-(etaH+deltaH)*H # theta is for recovered
    dR=eta1*E1+eta2*E2+etaH*H # eta is for recovered
    dD=delta2*E2+deltaH*H # delta is for death
    dV=f1*E1+f2*E2+fH*H-d*V

    dy=[dSq, dS, dE1, dE2, dH, dR, dD, dV]
    return dy

numpy_data=np.array([....  ]) # taken from your pastebin example

def func_y(t, qS, qSq, betaV1, betaV2, beta1, beta2, e1, eta1, 
           eta2, etaH, delta2, deltaH, theta2, f1, f2, fH, d, y_0):
    """
    Solution to the ODE y'(t) = f(t, y, parameters) with 
    initial condition y(0) = y_0
    """
    return  odeint(func_ode, y_0, t, args=(qS, qSq, betaV1, betaV2,
                                           beta1, beta2, e1, eta1,
                                           eta2, etaH, delta2, deltaH,
                                           theta2, f1, f2, fH, d))


y_0 = numpy_data[0,:].tolist()
numpy_data = numpy_data[1:60,:]
number_of_reference_time, _ = np.shape(numpy_data)
t = np.linspace(1, number_of_reference_time, number_of_reference_time)

# create a model from your function, identifying the names of the 
# independent variables (arguments to not treat as variables in the fit)
omodel = Model(func_y, independent_vars=['t', 'y_0'])

# make parameters for this model, using the argument names from 
# your model function
params = omodel.make_params(qS=0, qSq=1e-4, betaV1=4e-9, betaV2=1e-9,
                            beta1=4e-9, beta2=1e-9, e1=1/100,
                            eta1=1/21, eta2=1/104, etaH=1/10,
                            delta2=1/200, deltaH=1/10400, theta2=1/3.5,
                            f1=1400, f2=1000, fH=1700, d=144)

# do the fit to `data` with `parameters` and passing in the 
# independent variables explicitly
result = omodel.fit(numpy_data, params, t=t, y_0=y_0)

# print out fit statistics, best fit values, estimated uncertainties
# note: best fit parameters will be in `result.params['qS']`, etc
print(result.fit_report(min_correl=0.5))

# Plot the portions of the best fit results
plt.plot(t, result.best_fit[:, 0], label='Sq')
plt.plot(t, result.best_fit[:, 1], label='S')
plt.plot(t, result.best_fit[:, 2], label='E1')
plt.plot(t, result.best_fit[:, 3], label='E2')
plt.plot(t, result.best_fit[:, 4], label='H')
plt.plot(t, result.best_fit[:, 5], label='R')
plt.plot(t, result.best_fit[:, 6], label='D')
plt.plot(t, result.best_fit[:, 7], label='V')
plt.legend(loc='best')
plt.xlabel('t')
plt.grid()
plt.show()

This will print out a report of

[[Model]]
    Model(func_y)
[[Fit Statistics]]
    # fitting method   = leastsq
    # function evals   = 162
    # data points      = 472
    # variables        = 17
    chi-square         = 4.1921e+18
    reduced chi-square = 9.2134e+15
    Akaike info crit   = 17367.1373
    Bayesian info crit = 17437.8060
[[Variables]]
    qS:     -0.01661105 +/- 0.00259955 (15.65%) (init = 0)
    qSq:     1.2272e-04 +/- 2.5428e-05 (20.72%) (init = 0.0001)
    betaV1:  4.5773e-09 +/- 6.9243e-10 (15.13%) (init = 4e-09)
    betaV2:  7.6846e-10 +/- 1.7084e-10 (22.23%) (init = 1e-09)
    beta1:   1.3770e-10 +/- 8.4682e-12 (6.15%) (init = 4e-09)
    beta2:   6.0831e-10 +/- 1.1471e-10 (18.86%) (init = 1e-09)
    e1:      0.04271070 +/- 0.00378279 (8.86%) (init = 0.01)
    eta1:    0.00182043 +/- 3.7579e-04 (20.64%) (init = 0.04761905)
    eta2:   -0.01052990 +/- 5.4028e-04 (5.13%) (init = 0.009615385)
    etaH:    0.12337818 +/- 0.01710376 (13.86%) (init = 0.1)
    delta2:  0.00644283 +/- 5.9399e-04 (9.22%) (init = 0.005)
    deltaH:  9.0316e-05 +/- 4.1630e-05 (46.09%) (init = 9.615385e-05)
    theta2:  0.22640213 +/- 0.06697444 (29.58%) (init = 0.2857143)
    f1:      447.226564 +/- 88.1621816 (19.71%) (init = 1400)
    f2:     -240.442614 +/- 30.5435276 (12.70%) (init = 1000)
    fH:      3780.95590 +/- 543.897368 (14.39%) (init = 1700)
    d:       173.743295 +/- 24.3128286 (13.99%) (init = 144)
[[Correlations]] (unreported correlations are < 0.500)
    C(qS, deltaH)     = -0.889
    C(etaH, theta2)   = -0.713
    C(betaV1, f1)     = -0.692
    C(beta1, beta2)   = -0.681
    C(betaV2, etaH)   = -0.673
    C(qS, eta2)       = -0.652
    C(deltaH, d)      = -0.651
    C(betaV1, theta2) =  0.646
    C(f1, d)          =  0.586
    C(eta2, deltaH)   =  0.585
    C(betaV2, d)      =  0.582
    C(qSq, betaV1)    = -0.523
    C(betaV2, f1)     =  0.510

and make a plot like this:

I don't know if that is the best fit you were looking for, but I hope it helps get you started.