Search code examples
pythonmatplotlibfftcurve-fittingleast-squares

How do I add weighting to a curve not the data points during fitting? What import/func do I use?


I've tried scipy.optimize import curve_fit but it only seems to change the data points. I want to add a 1/Y^2 weighting during the fitting of the Ycurve from my data points residuals (least sq with weighting). I'm not sure how to target the yfit instead of ydata or if I should use something else? Any help would be appreciated.

xdata = np.array([0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 1.12, 1.12, 2.89, 2.89, 2.89, 2.89,
                     2.89, 2.89, 6.19, 6.19, 6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30, 108.98, 108.98,
                     108.98, 108.98, 108.98, 255.33, 255.33, 255.33, 255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62,
                     1188.62, 1188.62], dtype=float)

ydata = np.array([0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528, -1.15391, -1.46504, -0.942926,
                     -2.12808, -2.90962, -1.51093, -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019, -13.8455,
                     -16.9911, -11.0881, -10.6453, -15.1288, -52.4669, -68.2344, -74.7673, -70.2025, -65.8181, -55.7344,
                     -271.286, -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43, -984.344, -1124.8, -1076.27,
                     -1072.03, -3968.22, -3114.46, -3771.61, -2805.4, -4078.05], dtype=float)

def fourPL(x, A, B, C, D):
    return ((A-D)/(1.0+((x/C)**B))) + D

params, params_covariance = spo.curve_fit(fourPL, xdata, ydata)
params_list = params
roundy = [round(num, 4) for num in params_list]
print(roundy)

popt2, pcov2 = spo.curve_fit(fourPL, xdata, ydata, sigma=1/ydata**2, absolute_sigma=True)
yfit2 = fourPL(xdata, *popt2)
params_list2 = popt2
roundy2 = [round(num, 4) for num in params_list2]
print(roundy2)

x_min, x_max = np.amin(xdata), np.amax(xdata)
xs = np.linspace(x_min, x_max, 1000)
plt.scatter(xdata, ydata)
plt.plot(xs, fourPL(xs, *params), 'm--', label='No Weight')
plt.plot(xs, fourPL(xs, *popt2), 'b--', label='Weights')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05, 0.1, 0.1),
              ncol=3, fancybox=True, shadow=True)
plt.xlabel('µg/mL)')
plt.ylabel('kHz/s')
#plt.xscale('log')
plt.show()
```


Solution

  • This would be my version using a manual least_squares fit. I compare it to the solution obtained with the simple curve_fit. Actually the difference is not very big and the curve_fit result looks good to me.

    import matplotlib.pyplot as plt
    import numpy as np
    from scipy.optimize import least_squares
    from scipy.optimize import curve_fit
    
    np.set_printoptions( linewidth=250, precision=2 )  ## avoids OPs "roundy"
    
    xdata = np.array(
        [
            0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 
            1.12, 1.12, 2.89, 2.89, 2.89, 2.89, 2.89, 2.89, 6.19, 6.19, 
            6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30,
            108.98, 108.98, 108.98, 108.98, 108.98, 255.33, 255.33, 255.33,
            255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62, 1188.62,
            1188.62
        ], dtype=float
    )
    
    ydata = np.array(
        [
            0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528,
            -1.15391, -1.46504, -0.942926, -2.12808, -2.90962, -1.51093,
            -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019,
            -13.8455, -16.9911, -11.0881, -10.6453, -15.1288, -52.4669,
            -68.2344, -74.7673, -70.2025, -65.8181, -55.7344, -271.286,
            -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43,
            -984.344, -1124.8, -1076.27, -1072.03, -3968.22, -3114.46,
            -3771.61, -2805.4, -4078.05
        ], dtype=float
    )
    
    def fourPL( x, a, b, c, d ):
        out = ( a - d ) / ( 1 + ( x / c )**b ) + d
        return out
    
    def residuals( params, xlist, ylist ):
        # ~a, b, c, d = params
        yth = np.fromiter( (fourPL( x, *params ) for x in xlist ), np.float )
        diff = np.subtract( yth, ylist )
        weights = 1 / np.abs( yth )**1
        ## not sure if this makes sense, but we weigth with function value
        ## here I put it inverse linear as it gets squared in the chi-square
        ## but other weighting may be required
        return diff * weights
    
    ### for initial guess
    xl = np.linspace( .1, 1200, 150 )
    yl0 = np.fromiter( (fourPL( x, 1, 1, 1000, -6000) for x in xl ), np.float )
    
    ### standard curve_fit
    cfsol, _ = curve_fit( fourPL, xdata, ydata, p0=( 1, 1, 1000, -6000 ) )
    print( cfsol )
    yl1 = np.fromiter( (fourPL( x, *cfsol ) for x in xl ), np.float )
    
    ### least square with manual residual function including "unusual" weighting
    lssol = least_squares( residuals, x0=( 1, 1, 1000, -6000 ), args=( xdata, ydata ) )
    print( lssol.x )
    yl2 = np.fromiter( (fourPL( x, *( lssol.x ) ) for x in xl ), np.float )
    
    ### plotting
    fig = plt.figure()
    ax = fig.add_subplot( 1, 1, 1 )
    ax.scatter( xdata, ydata )
    ax.plot( xl, yl1 )
    ax.plot( xl, yl2 )
    
    plt.show()
    

    two fit variants

    Looking at the covariance matrix, not only reveals rather large errors, but also quite high correlation of the parameters. This is due to the nature of the model of course, but should be handled with care, especially when it comes to data interpretation.

    The problem becomes even more obvious when we only consider data for small x. The data for large x scatters a lot anyhow. For small x or large c the Taylor expansion is (a - d ) (1 - b x / c ) + d which isa - (a - d ) b / c x and that is basically a + e x. So b, c and d are basically doing the same.