Search code examples
regressiongekko

GEKKO - parameter estimation with custom objective function - error code -13


I have been successful in performing a steady-state parameter estimation employing the same techniques presented in the Gekko tutorials (linear and non-linear regression). Below is the code:

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
from io import StringIO

from gekko import GEKKO

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt

# Tb, Dy, Ho, Y, H, (HA)2
# measurements of the initial and final aqueous concentrations

aqueous_species = ["Tb", "Dy", "Ho", "Y"]

# import dataframe

x_i_a_Lns_v = (
    pd.read_csv(
        StringIO(
            """Tb,Dy,Ho,Y
0.19538,1.22628,0.2242,3.39823
0.28462,1.83411,0.32435,4.90551
0.34769,2.1979,0.39609,5.89209
0.37692,2.39495,0.43794,6.57722
0.41231,2.62232,0.47382,7.09791
0.43538,2.78906,0.5052,7.45418
0.44462,2.88,0.51866,7.89266
0.46,2.92548,0.52912,7.83785
0.46615,2.94064,0.5351,7.97488
0.45846,2.91032,0.52613,7.94747
0.46,2.86485,0.52613,7.89266
0.46769,2.92548,0.53659,8.00228
0.45692,2.92548,0.5351,8.02969
0.47385,2.98611,0.55005,8.13931
0.47385,2.97095,0.54108,8.1119"""
        )
    )
    / 1000
)

x_i_a_H_v = pd.read_csv(
    StringIO(
        """H
10.01809
7.28346
5.16795
3.62036
2.50218
1.7173
1.17411
0.80491
0.54616
0.37078
0.25406
0.16932
0.11262
0.07574
0.04455"""
    )
)

x_i_o_HA2_v = pd.read_csv(
    StringIO(
        """(HA)2
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746"""
    )
)

x_f_a_Lns_v = (
    pd.read_csv(
        StringIO(
            """Tb,Dy,Ho,Y
0.19231,1.23234,0.21822,3.34342
0.26923,1.74316,0.31239,4.60405
0.33692,2.13727,0.38862,5.72766
0.37385,2.33432,0.41253,6.05652
0.40923,2.50106,0.43196,5.97431
0.38769,2.1979,0.3363,3.91892
0.33692,1.53095,0.19431,1.91287
0.24,0.82307,0.0852,0.77282
0.12,0.33347,0.03139,0.27679
0.05846,0.14552,0.01495,0.1151
0.02615,0.06669,0.00598,0.05207
0.01231,0.03032,,0.02466
,,,0.00548
0.00615,0.01364,,0.01096
,,,0.00548"""
        )
    )
    / 1000
)

# Issue with NaN (missing value)
idx = x_f_a_Lns_v[x_f_a_Lns_v["Ho"] >= 0].index
# idx = x_f_a_Lns_v.index

#### Solution
# create model to be solved locally
m = GEKKO(remote=False)

x_i_a_Lns = [m.Param(value=x_i_a_Lns_v.loc[idx, v].values) for v in aqueous_species]

x_i_a_H = m.Param(value=x_i_a_H_v.loc[idx, "H"].values)
x_i_o_HA2 = m.Param(value=x_i_o_HA2_v.loc[idx, "(HA)2"].values)

x_f_a_Lns = [m.CV(value=x_f_a_Lns_v.loc[idx, v].values) for v in aqueous_species]

for i in range(len(aqueous_species)):
    x_f_a_Lns[i].FSTATUS = 1

# equilibrium constants
K_eqs = [m.FV(value=1) for i in range(len(aqueous_species))]

for i in range(len(x_f_a_Lns_v.columns)):
    K_eqs[i].STATUS = 1


# equilibrium model
x_f_a_H = m.Intermediate(
    x_i_a_H
    + 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
)
x_f_o_HA2 = m.Intermediate(
    x_i_o_HA2
    - 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
)

m.Equations(
    [
        K_eqs[i]
        == ((x_i_a_Lns[i] - x_f_a_Lns[i]) * x_f_a_H ** 3)
        / (x_f_a_Lns[i] * x_f_o_HA2 ** 3)
        for i in range(len(aqueous_species))
    ]
)


# regression
m.options.IMODE = 2

m.options.EV_TYPE = 2
m.solve(disp=True)

for i, v in enumerate(aqueous_species):
    print(f"K_eq_{v}: {K_eqs[i][0]}")

Output:

K_eq_Tb: 5.7327051238
K_eq_Dy: 15.581534791
K_eq_Ho: 27.414244408
K_eq_Y: 46.925190325

This is as expected.

However, when I try and implement a specific objective function I encounter an error (-13). I have tried setting a lower bound for the Var(s) however I then receive a different error code of (-1). I hope that that somebody can identify where I have made an error. I am of the view that the objective function is such that a similar result should be found.

# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
from io import StringIO

from gekko import GEKKO

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt

# Tb, Dy, Ho, Y, H, (HA)2
# measurements of the initial and final aqueous concentrations

aqueous_species = ["Tb", "Dy", "Ho", "Y"]

# import dataframes
x_i_a_Lns_v = (
    pd.read_csv(
        StringIO(
            """Tb,Dy,Ho,Y
0.19538,1.22628,0.2242,3.39823
0.28462,1.83411,0.32435,4.90551
0.34769,2.1979,0.39609,5.89209
0.37692,2.39495,0.43794,6.57722
0.41231,2.62232,0.47382,7.09791
0.43538,2.78906,0.5052,7.45418
0.44462,2.88,0.51866,7.89266
0.46,2.92548,0.52912,7.83785
0.46615,2.94064,0.5351,7.97488
0.45846,2.91032,0.52613,7.94747
0.46,2.86485,0.52613,7.89266
0.46769,2.92548,0.53659,8.00228
0.45692,2.92548,0.5351,8.02969
0.47385,2.98611,0.55005,8.13931
0.47385,2.97095,0.54108,8.1119"""
        )
    )
    / 1000
)

x_i_a_H_v = pd.read_csv(
    StringIO(
        """H
10.01809
7.28346
5.16795
3.62036
2.50218
1.7173
1.17411
0.80491
0.54616
0.37078
0.25406
0.16932
0.11262
0.07574
0.04455"""
    )
)

x_i_o_HA2_v = pd.read_csv(
    StringIO(
        """(HA)2
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746
0.4746"""
    )
)

x_f_a_Lns_v = (
    pd.read_csv(
        StringIO(
            """Tb,Dy,Ho,Y
0.19231,1.23234,0.21822,3.34342
0.26923,1.74316,0.31239,4.60405
0.33692,2.13727,0.38862,5.72766
0.37385,2.33432,0.41253,6.05652
0.40923,2.50106,0.43196,5.97431
0.38769,2.1979,0.3363,3.91892
0.33692,1.53095,0.19431,1.91287
0.24,0.82307,0.0852,0.77282
0.12,0.33347,0.03139,0.27679
0.05846,0.14552,0.01495,0.1151
0.02615,0.06669,0.00598,0.05207
0.01231,0.03032,,0.02466
,,,0.00548
0.00615,0.01364,,0.01096
,,,0.00548"""
        )
    )
    / 1000
)

# Issue with NaN (missing value)
idx = x_f_a_Lns_v[x_f_a_Lns_v["Ho"] >= 0].index
# idx = x_f_a_Lns_v.index

#### Solution
# create model to be solved locally
m = GEKKO(remote=False)

x_i_a_Lns = [m.Param(value=x_i_a_Lns_v.loc[idx, v].values) for v in aqueous_species]

x_i_a_H = m.Param(value=x_i_a_H_v.loc[idx, "H"].values)
x_i_o_HA2 = m.Param(value=x_i_o_HA2_v.loc[idx, "(HA)2"].values)


x_f_a_Lns_m = [m.Param(value=x_f_a_Lns_v.loc[idx, v].values) for v in aqueous_species]
x_f_a_Lns = [m.Var() for v in aqueous_species]
# x_f_a_Lns = [m.Var(lb=1e-12) for v in aqueous_species]

# equilibrium constants
K_eqs = [m.FV(value=1) for i in range(len(aqueous_species))]

for i in range(len(aqueous_species)):
    K_eqs[i].STATUS = 1


# equilibrium model
x_f_a_H = m.Intermediate(
    x_i_a_H
    + 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
)
x_f_o_HA2 = m.Intermediate(
    x_i_o_HA2
    - 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
)

m.Equations(
    [
        K_eqs[i]
        == ((x_i_a_Lns[i] - x_f_a_Lns[i]) * x_f_a_H ** 3)
        / (x_f_a_Lns[i] * x_f_o_HA2 ** 3)
        for i in range(len(aqueous_species))
    ]
)


m.Obj(
    m.sum(
        [
            ((x_f_a_Lns_m[i] - x_f_a_Lns[i]) / x_f_a_Lns_m[i]) ** 2
            for i in range(len(aqueous_species))
        ]
    )
)

# regression
m.options.IMODE = 2

# m.options.EV_TYPE = 2
m.solve(disp=True)

for i, v in enumerate(aqueous_species):
    print(f"K_eq_{v}: {K_eqs[i][0]}")


Solution

  • Error code -13 description for solver IPOPT is reported in the output:

    EXIT: Invalid number in NLP function or derivative detected.
    
     An error occured.
     The error code is  -13
    

    This means that there is likely divide by zero somewhere in the model. The other error -1 is when an infeasible solution is detected. The additional constraints prevent the solver from finding a solution because the equations and variable constraints can't be satisfied.

    Two things are needed to find a successful solution:

    1. Rearrange equation to avoid divide by zero.
    m.Equations(
        [
            K_eqs[i]
            == ((x_i_a_Lns[i] - x_f_a_Lns[i]) * x_f_a_H ** 3)
            / (x_f_a_Lns[i] * x_f_o_HA2 ** 3)
            for i in range(len(aqueous_species))
        ]
    )
    

    Rearrange to:

    m.Equations(
        [
            K_eqs[i] * (x_f_a_Lns[i] * x_f_o_HA2 ** 3)
            == ((x_i_a_Lns[i] - x_f_a_Lns[i]) * x_f_a_H ** 3)
            for i in range(len(aqueous_species))
        ]
    )
    
    1. Switch to APOPT solver:
    m.options.SOLVER=1
    

    There is now a successful solution.

    # -*- coding: utf-8 -*-
    """
    Spyder Editor
    
    This is a temporary script file.
    """
    from io import StringIO
    
    from gekko import GEKKO
    
    import numpy as np
    import pandas as pd
    
    
    import matplotlib.pyplot as plt
    
    # Tb, Dy, Ho, Y, H, (HA)2
    # measurements of the initial and final aqueous concentrations
    
    aqueous_species = ["Tb", "Dy", "Ho", "Y"]
    
    # import dataframes
    x_i_a_Lns_v = (
        pd.read_csv(
            StringIO(
                """Tb,Dy,Ho,Y
    0.19538,1.22628,0.2242,3.39823
    0.28462,1.83411,0.32435,4.90551
    0.34769,2.1979,0.39609,5.89209
    0.37692,2.39495,0.43794,6.57722
    0.41231,2.62232,0.47382,7.09791
    0.43538,2.78906,0.5052,7.45418
    0.44462,2.88,0.51866,7.89266
    0.46,2.92548,0.52912,7.83785
    0.46615,2.94064,0.5351,7.97488
    0.45846,2.91032,0.52613,7.94747
    0.46,2.86485,0.52613,7.89266
    0.46769,2.92548,0.53659,8.00228
    0.45692,2.92548,0.5351,8.02969
    0.47385,2.98611,0.55005,8.13931
    0.47385,2.97095,0.54108,8.1119"""
            )
        )
        / 1000
    )
    
    x_i_a_H_v = pd.read_csv(
        StringIO(
            """H
    10.01809
    7.28346
    5.16795
    3.62036
    2.50218
    1.7173
    1.17411
    0.80491
    0.54616
    0.37078
    0.25406
    0.16932
    0.11262
    0.07574
    0.04455"""
        )
    )
    
    x_i_o_HA2_v = pd.read_csv(
        StringIO(
            """(HA)2
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746
    0.4746"""
        )
    )
    
    x_f_a_Lns_v = (
        pd.read_csv(
            StringIO(
                """Tb,Dy,Ho,Y
    0.19231,1.23234,0.21822,3.34342
    0.26923,1.74316,0.31239,4.60405
    0.33692,2.13727,0.38862,5.72766
    0.37385,2.33432,0.41253,6.05652
    0.40923,2.50106,0.43196,5.97431
    0.38769,2.1979,0.3363,3.91892
    0.33692,1.53095,0.19431,1.91287
    0.24,0.82307,0.0852,0.77282
    0.12,0.33347,0.03139,0.27679
    0.05846,0.14552,0.01495,0.1151
    0.02615,0.06669,0.00598,0.05207
    0.01231,0.03032,,0.02466
    ,,,0.00548
    0.00615,0.01364,,0.01096
    ,,,0.00548"""
            )
        )
        / 1000
    )
    
    # Issue with NaN (missing value)
    idx = x_f_a_Lns_v[x_f_a_Lns_v["Ho"] >= 0].index
    # idx = x_f_a_Lns_v.index
    
    #### Solution
    # create model to be solved locally
    m = GEKKO(remote=False)
    
    x_i_a_Lns = [m.Param(value=x_i_a_Lns_v.loc[idx, v].values) for v in aqueous_species]
    
    x_i_a_H = m.Param(value=x_i_a_H_v.loc[idx, "H"].values)
    x_i_o_HA2 = m.Param(value=x_i_o_HA2_v.loc[idx, "(HA)2"].values)
    
    
    x_f_a_Lns_m = [m.Param(value=x_f_a_Lns_v.loc[idx, v].values) for v in aqueous_species]
    x_f_a_Lns = [m.Var() for v in aqueous_species]
    # x_f_a_Lns = [m.Var(lb=1e-12) for v in aqueous_species]
    
    # equilibrium constants
    K_eqs = [m.FV(value=1) for i in range(len(aqueous_species))]
    
    for i in range(len(aqueous_species)):
        K_eqs[i].STATUS = 1
    
    
    # equilibrium model
    x_f_a_H = m.Intermediate(
        x_i_a_H
        + 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
    )
    x_f_o_HA2 = m.Intermediate(
        x_i_o_HA2
        - 3 * m.sum([(x_i_a_Lns[i] - x_f_a_Lns[i]) for i in range(len(aqueous_species))])
    )
    
    m.Equations(
        [
            K_eqs[i] * (x_f_a_Lns[i] * x_f_o_HA2 ** 3)
            == ((x_i_a_Lns[i] - x_f_a_Lns[i]) * x_f_a_H ** 3)
            for i in range(len(aqueous_species))
        ]
    )
    
    
    m.Obj(
        m.sum(
            [
                ((x_f_a_Lns_m[i] - x_f_a_Lns[i]) / x_f_a_Lns_m[i]) ** 2
                for i in range(len(aqueous_species))
            ]
        )
    )
    
    # regression
    m.options.IMODE = 2
    
    m.options.SOLVER=1
    
    # m.options.EV_TYPE = 2
    m.solve(disp=True)
    
    for i, v in enumerate(aqueous_species):
        print(f"K_eq_{v}: {K_eqs[i][0]}")
    
     Successful solution
     
     ---------------------------------------------------
     Solver         :  APOPT (v1.0)
     Solution time  :  0.06899999999999999 sec
     Objective      :  0.32417750846391324
     Successful solution
     ---------------------------------------------------
     
    
    K_eq_Tb: 5.489166594
    K_eq_Dy: 15.245134386
    K_eq_Ho: 30.529918885
    K_eq_Y: 54.83667882