Search code examples
pythonoptimizationscipycurve-fitting

scipy curve_fit with constraint and fixing points


I'm trying to fit a function using SciPy's optimize.curve_fit to some scattered data, but I need the area under the fitted curve to be the same as that calculated based on the scattered data, and also that the curve passes through the initial and end points of the data. In order to do that, I am using the area (integral) defined by the scattered data in a penalization formulation as in this answer, while weighing the fit with the parameter sigma as proposed here.

Unfortunately, I can't get my fit to pass through the initial and end points when including the integral constraint. If I disregard the integral constraint, the fit works fine and passes through the point. Is it not possible to satisfy both the integral and points constraint? I am using Python 3.7.10 on Windows 10.

import scipy
import numpy as np
import matplotlib.pyplot as plt

x = scipy.linspace(0, scipy.pi, 100)
y = scipy.sin(x) + (0. + scipy.rand(len(x))*0.4)

def Func(x,a,b,c):
    return a*x**2 + b*x + c

# modified function definition with penalization
def FuncPen(x,a,b,c):
    integral = scipy.integrate.quad(Func, x[0], x[-1], args=(a,b,c))[0]
    penalization = abs(np.trapz(y,x)-integral)*10000
    return a*x**2 + b*x + c + penalization

sigma = np.ones(len(x))
sigma[[0, -1]] = 0.0001 # first and last points

popt1, _ = scipy.optimize.curve_fit(Func, x, y, sigma=sigma)
popt2, _ = scipy.optimize.curve_fit(FuncPen, x, y, sigma=sigma)

y_fit1 = Func(x, *popt1)
y_fit2 = Func(x, *popt2)    

fig, ax = plt.subplots(1)
ax.scatter(x,y)
ax.plot(x,y_fit1, color='g', alpha=0.75, label='curve_fit')
ax.plot(x,y_fit2, color='b', alpha=0.75, label='constrained')
plt.legend()

enter image description here


Solution

  • Many thanks to Erwin Kalvelagen for the mind-opening comment on the question. I am posting my solution here:

    import scipy
    import numpy as np
    import matplotlib.pyplot as plt
    
    x = scipy.linspace(0, scipy.pi, 100)
    y = scipy.sin(x) + (0. + scipy.rand(len(x))*0.4)
    
    def Func(x,a,b,c):
        return a*x**2 + b*x + c
    
    # modified function definition with penalization
    def FuncPen(x,a,b,c):
        integral = scipy.integrate.quad(Func, x[0], x[-1], args=(a,b,c))[0]
        penalization = abs(np.trapz(y,x)-integral)*10000
        return a*x**2 + b*x + c + penalization
    
    # Writing as a general constraint problem
    def FuncNew(x,params):
        return params[2]*x**2 + params[1]*x + params[0]
    
    def ConstraintIntegral(params):
        integral = scipy.integrate.quad(FuncNew, x[0], x[-1], args=(params,))[0]
        return integral- np.trapz(y,x)
    
    def ConstraintBegin(params):
        return y[0] - FuncNew(x[0],params)
    
    def ConstraintEnd(params):
        return y[-1] - FuncNew(x[-1],params)
    
    def Objective(params,x,y):
        y_pred = FuncNew(x,params)
        return np.sum((y_pred - y) ** 2) # least squares
    
    cons = [{'type':'eq', 'fun': ConstraintIntegral},
            {'type':'eq', 'fun': ConstraintBegin},
            {'type':'eq', 'fun': ConstraintEnd}]
    
    
    sigma = np.ones(len(x))
    sigma[[0, -1]] = 0.0001 # first and last points
    
    popt1, _ = scipy.optimize.curve_fit(Func, x, y, sigma=sigma)
    popt2, _ = scipy.optimize.curve_fit(FuncPen, x, y, sigma=sigma)
    
    y_fit1 = Func(x, *popt1)
    y_fit2 = Func(x, *popt2)    
    
    new = scipy.optimize.minimize(Objective, x0=popt1, args=(x,y), constraints=cons)
    popt3 = new.x
    y_fit3 = FuncNew(x,popt3)
    
    fig, ax = plt.subplots(1)
    ax.scatter(x,y)
    ax.plot(x,y_fit1, color='g', alpha=0.75, label='curve_fit')
    ax.plot(x,y_fit2, color='b', alpha=0.75, label='constrained')
    ax.plot(x,y_fit3, color='r', alpha=0.75, label='generally constrained')
    plt.legend()
    

    enter image description here