python matplotlib fft curve-fitting least-squares

How do I add weighting to a curve not the data points during fitting? What import/func do I use?

I've tried scipy.optimize import curve_fit but it only seems to change the data points. I want to add a 1/Y^2 weighting during the fitting of the Ycurve from my data points residuals (least sq with weighting). I'm not sure how to target the yfit instead of ydata or if I should use something else? Any help would be appreciated.

xdata = np.array([0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 1.12, 1.12, 2.89, 2.89, 2.89, 2.89,
                     2.89, 2.89, 6.19, 6.19, 6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30, 108.98, 108.98,
                     108.98, 108.98, 108.98, 255.33, 255.33, 255.33, 255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62,
                     1188.62, 1188.62], dtype=float)

ydata = np.array([0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528, -1.15391, -1.46504, -0.942926,
                     -2.12808, -2.90962, -1.51093, -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019, -13.8455,
                     -16.9911, -11.0881, -10.6453, -15.1288, -52.4669, -68.2344, -74.7673, -70.2025, -65.8181, -55.7344,
                     -271.286, -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43, -984.344, -1124.8, -1076.27,
                     -1072.03, -3968.22, -3114.46, -3771.61, -2805.4, -4078.05], dtype=float)

def fourPL(x, A, B, C, D):
    return ((A-D)/(1.0+((x/C)**B))) + D

params, params_covariance = spo.curve_fit(fourPL, xdata, ydata)
params_list = params
roundy = [round(num, 4) for num in params_list]
print(roundy)

popt2, pcov2 = spo.curve_fit(fourPL, xdata, ydata, sigma=1/ydata**2, absolute_sigma=True)
yfit2 = fourPL(xdata, *popt2)
params_list2 = popt2
roundy2 = [round(num, 4) for num in params_list2]
print(roundy2)

x_min, x_max = np.amin(xdata), np.amax(xdata)
xs = np.linspace(x_min, x_max, 1000)
plt.scatter(xdata, ydata)
plt.plot(xs, fourPL(xs, *params), 'm--', label='No Weight')
plt.plot(xs, fourPL(xs, *popt2), 'b--', label='Weights')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05, 0.1, 0.1),
              ncol=3, fancybox=True, shadow=True)
plt.xlabel('µg/mL)')
plt.ylabel('kHz/s')
#plt.xscale('log')
plt.show()
```

Solution

This would be my version using a manual least_squares fit. I compare it to the solution obtained with the simple curve_fit. Actually the difference is not very big and the curve_fit result looks good to me.

import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import least_squares
from scipy.optimize import curve_fit

np.set_printoptions( linewidth=250, precision=2 )  ## avoids OPs "roundy"

xdata = np.array(
    [
        0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 
        1.12, 1.12, 2.89, 2.89, 2.89, 2.89, 2.89, 2.89, 6.19, 6.19, 
        6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30,
        108.98, 108.98, 108.98, 108.98, 108.98, 255.33, 255.33, 255.33,
        255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62, 1188.62,
        1188.62
    ], dtype=float
)

ydata = np.array(
    [
        0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528,
        -1.15391, -1.46504, -0.942926, -2.12808, -2.90962, -1.51093,
        -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019,
        -13.8455, -16.9911, -11.0881, -10.6453, -15.1288, -52.4669,
        -68.2344, -74.7673, -70.2025, -65.8181, -55.7344, -271.286,
        -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43,
        -984.344, -1124.8, -1076.27, -1072.03, -3968.22, -3114.46,
        -3771.61, -2805.4, -4078.05
    ], dtype=float
)

def fourPL( x, a, b, c, d ):
    out = ( a - d ) / ( 1 + ( x / c )**b ) + d
    return out

def residuals( params, xlist, ylist ):
    # ~a, b, c, d = params
    yth = np.fromiter( (fourPL( x, *params ) for x in xlist ), np.float )
    diff = np.subtract( yth, ylist )
    weights = 1 / np.abs( yth )**1
    ## not sure if this makes sense, but we weigth with function value
    ## here I put it inverse linear as it gets squared in the chi-square
    ## but other weighting may be required
    return diff * weights

### for initial guess
xl = np.linspace( .1, 1200, 150 )
yl0 = np.fromiter( (fourPL( x, 1, 1, 1000, -6000) for x in xl ), np.float )

### standard curve_fit
cfsol, _ = curve_fit( fourPL, xdata, ydata, p0=( 1, 1, 1000, -6000 ) )
print( cfsol )
yl1 = np.fromiter( (fourPL( x, *cfsol ) for x in xl ), np.float )

### least square with manual residual function including "unusual" weighting
lssol = least_squares( residuals, x0=( 1, 1, 1000, -6000 ), args=( xdata, ydata ) )
print( lssol.x )
yl2 = np.fromiter( (fourPL( x, *( lssol.x ) ) for x in xl ), np.float )

### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.scatter( xdata, ydata )
ax.plot( xl, yl1 )
ax.plot( xl, yl2 )

plt.show()

Looking at the covariance matrix, not only reveals rather large errors, but also quite high correlation of the parameters. This is due to the nature of the model of course, but should be handled with care, especially when it comes to data interpretation.

The problem becomes even more obvious when we only consider data for small x. The data for large x scatters a lot anyhow. For small x or large c the Taylor expansion is (a - d ) (1 - b x / c ) + d which isa - (a - d ) b / c x and that is basically a + e x. So b, c and d are basically doing the same.