It seems that the best-fit parameters reported my lmfit
(when scale_covar = False
) are inaccurate when chi-square is very close to zero (i.e. when the model fits the data almost perfectly). Below I perform a straight-line regression of Y = [0,1,2]
against X = [0,1,2]
, and add increasing amounts of noise (f
) to the data. Only when the noise is large enough are the reported uncertainties equal to those expected from ordinary least squares.
Is this a bug/limitation in how lmfit
estimates parameter uncertainties, or a bug/limitation in my understanding of what to expect from lmfit
?
# python 3.7.6
# lmfit 1.0.2
import numpy as np
from lmfit import minimize, Parameters
def residual(p, x, y):
return y - (p['a'] + p['b'] * x)
def report(out):
print(f'{f:8.0e} {out.chisqr:8.0e} {out.params["a"].value:8.2f} {out.covar[0,0]**.5:8.2f} {out.params["b"].value:8.2f} {out.covar[1,1]**.5:8.2f} {out.covar[0,1]/(out.covar[0,0]*out.covar[1,1])**.5:8.2f}')
print(f'{"noise":>8} {"chisq":>8} {"a":>8} {"SE(a)":>8} {"b":>8} {"SE(b)":>8} {"cor(a,b)":>8}')
print(f'{"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8}')
params = Parameters()
params.add('a', value=0.5)
params.add('b', value=0.5)
f = 0
X = np.array([0.,1.,2.])
Y = np.array([0.,1.,2.])
out = minimize(residual, params, args=(X, Y), scale_covar = False)
report(out)
for e in np.linspace(16,2,15):
f = 10**-e
Y[1] = 1 + f
out = minimize(residual, params, args=(X, Y), scale_covar = False)
report(out)
# OUTPUT:
#
#
# noise chisq a SE(a) b SE(b) cor(a,b)
# -------- -------- -------- -------- -------- -------- --------
# 0e+00 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-16 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-15 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-14 1e-24 -0.00 0.00 1.00 0.50 -0.45
# 1e-13 2e-24 -0.00 0.00 1.00 1.00 -0.89
# 1e-12 2e-24 -0.00 0.00 1.00 0.50 -0.45
# 1e-11 7e-23 0.00 1.00 1.00 0.45 -0.00
# 1e-10 7e-21 0.00 1.00 1.00 0.45 -0.00
# 1e-09 7e-19 0.00 1.00 1.00 0.45 -0.00
# 1e-08 7e-17 0.00 0.24 1.00 0.50 -0.44
# 1e-07 7e-15 0.00 0.93 1.00 0.67 -0.74
# 1e-06 7e-13 0.00 0.92 1.00 0.70 -0.77
# 1e-05 7e-11 0.00 0.91 1.00 0.71 -0.77
# 1e-04 7e-09 0.00 0.91 1.00 0.71 -0.77
# 1e-03 7e-07 0.00 0.91 1.00 0.71 -0.77
# 1e-02 7e-05 0.00 0.91 1.00 0.71 -0.77
For the record, the behavior described above changes when using the least_squares
(Trust Region Reflective) method instead of the default leastsq
(Levenberg-Marquardt) method, returning accurate estimates instead (see code below).
In many (most?) cases the former option will be equivalent or better than the latter in terms of convergence, with the added bonus of more robust estimates of variance-covariance in (fringe) cases such as the one described here. I'm not sure there is any obvious benefit to using leastsq
over least_squares
.
import numpy as np
from lmfit import minimize, Parameters, __version__
def residual(p, x, y):
return y - (p['a'] + p['b'] * x)
def report(out):
print(f'{f:8.0e} {out.chisqr:8.0e} {out.params["a"].value:8.2f} {out.covar[0,0]**.5:8.2f} {out.params["b"].value:8.2f} {out.covar[1,1]**.5:8.2f} {out.covar[0,1]/(out.covar[0,0]*out.covar[1,1])**.5:8.2f}')
params = Parameters()
params.add('a', value=0.5)
params.add('b', value=0.5)
for method in ['least_squares', 'leastsq']:
print(f'\nMETHOD = {method}\n')
print(f'{"noise":>8} {"chisq":>8} {"a":>8} {"SE(a)":>8} {"b":>8} {"SE(b)":>8} {"cor(a,b)":>8}')
print(f'{"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8} {"-"*8}')
f = 0
X = np.array([0.,1.,2.])
Y = np.array([0.,1.,2.])
out = minimize(residual, params, args=(X, Y), scale_covar = False, method = method)
report(out)
for e in np.linspace(16,2,15):
f = 10**-e
Y[1] = 1 + f
out = minimize(residual, params, args=(X, Y), scale_covar = False, method = method)
report(out)
# OUTPUT:
#
# METHOD = least_squares
#
# noise chisq a SE(a) b SE(b) cor(a,b)
# -------- -------- -------- -------- -------- -------- --------
# 0e+00 2e-32 -0.00 0.91 1.00 0.71 -0.77
# 1e-16 2e-32 -0.00 0.91 1.00 0.71 -0.77
# 1e-15 8e-31 0.00 0.91 1.00 0.71 -0.77
# 1e-14 7e-29 0.00 0.91 1.00 0.71 -0.77
# 1e-13 7e-27 0.00 0.91 1.00 0.71 -0.77
# 1e-12 7e-25 0.00 0.91 1.00 0.71 -0.77
# 1e-11 7e-23 0.00 0.91 1.00 0.71 -0.77
# 1e-10 7e-21 0.00 0.91 1.00 0.71 -0.77
# 1e-09 7e-19 0.00 0.91 1.00 0.71 -0.77
# 1e-08 7e-17 0.00 0.91 1.00 0.71 -0.77
# 1e-07 7e-15 0.00 0.91 1.00 0.71 -0.77
# 1e-06 7e-13 0.00 0.91 1.00 0.71 -0.77
# 1e-05 7e-11 0.00 0.91 1.00 0.71 -0.77
# 1e-04 7e-09 0.00 0.91 1.00 0.71 -0.77
# 1e-03 7e-07 0.00 0.91 1.00 0.71 -0.77
# 1e-02 7e-05 0.00 0.91 1.00 0.71 -0.77
#
# METHOD = leastsq
#
# noise chisq a SE(a) b SE(b) cor(a,b)
# -------- -------- -------- -------- -------- -------- --------
# 0e+00 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-16 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-15 2e-24 0.00 1.00 1.00 0.45 -0.00
# 1e-14 1e-24 -0.00 0.00 1.00 0.50 -0.45
# 1e-13 2e-24 -0.00 0.00 1.00 1.00 -0.89
# 1e-12 2e-24 -0.00 0.00 1.00 0.50 -0.45
# 1e-11 7e-23 0.00 1.00 1.00 0.45 -0.00
# 1e-10 7e-21 0.00 1.00 1.00 0.45 -0.00
# 1e-09 7e-19 0.00 1.00 1.00 0.45 -0.00
# 1e-08 7e-17 0.00 0.24 1.00 0.50 -0.44
# 1e-07 7e-15 0.00 0.93 1.00 0.67 -0.74
# 1e-06 7e-13 0.00 0.92 1.00 0.70 -0.77
# 1e-05 7e-11 0.00 0.91 1.00 0.71 -0.77
# 1e-04 7e-09 0.00 0.91 1.00 0.71 -0.77
# 1e-03 7e-07 0.00 0.91 1.00 0.71 -0.77
# 1e-02 7e-05 0.00 0.91 1.00 0.71 -0.77