On the example of fit with multiple data sets from lmfit documentation, the data variable have rows of equal length.
I tried to adapt the example to run the fit using datasets with different lengths. The only change I made was changing the length of the x variable for each of the five different data sets, but it didn't worked out.
The error I got was:
data = np.array(data)
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (5,) + inhomogeneous part.
And here is the code I tried to run:
import numpy as np
import matplotlib.pyplot as plt
from lmfit import minimize, Parameters, report_fit
def gauss(x, amp, cen, sigma):
"basic gaussian"
return amp*np.exp(-(x-cen)**2/(2.*sigma**2))
def gauss_dataset(params, i, x):
"""calc gaussian from params for data set i
using simple, hardwired naming convention"""
amp = params['amp_%i' % (i+1)].value
cen = params['cen_%i' % (i+1)].value
sig = params['sig_%i' % (i+1)].value
return gauss(x, amp, cen, sig)
def objective(params, x, data):
""" calculate total residual for fits to several data sets held
in a 2-D array, and modeled by Gaussian functions"""
ndata, nx = data.shape
resid = 0.0*data[:]
# make residual per data set
for i in range(ndata):
resid[i, :] = data[i, :] - gauss_dataset(params, i, x)
# now flatten this to a 1D array, as minimize() needs
return resid.flatten()
# create 5 datasets
data = []
for i in np.arange(5):
#Now x has different length each iteration
x = np.linspace( -1, 2, 151+i)
params = Parameters()
amp = 0.60 + 9.50*np.random.rand()
cen = -0.20 + 1.20*np.random.rand()
sig = 0.25 + 0.03*np.random.rand()
dat = gauss(x, amp, cen, sig) + np.random.normal(size=len(x), scale=0.1)
data.append(dat)
# data has shape (5, 151)
data = np.array(data)
assert(data.shape) == (5, 151)
# create 5 sets of parameters, one per data set
fit_params = Parameters()
for iy, y in enumerate(data):
fit_params.add( 'amp_%i' % (iy+1), value=0.5, min=0.0, max=200)
fit_params.add( 'cen_%i' % (iy+1), value=0.4, min=-2.0, max=2.0)
fit_params.add( 'sig_%i' % (iy+1), value=0.3, min=0.01, max=3.0)
# but now constrain all values of sigma to have the same value
# by assigning sig_2, sig_3, .. sig_5 to be equal to sig_1
for iy in (2, 3, 4, 5):
fit_params['sig_%i' % iy].expr='sig_1'
# run the global fit to all the data sets
result = minimize(objective, fit_params, args=(x, data))
report_fit(result)
# plot the data sets and fits
plt.figure()
for i in range(5):
y_fit = gauss_dataset(fit_params, i, x)
plt.plot(x, data[i, :], 'o', x, y_fit, '-')
plt.show()
Here's and example of simultaneously fitting multiple (2) data sets of different sizes and forms:
Dataset 1: Gaussian with amplitude, center, sigma Dataset 2: Decaying sine wave with amplitdue, frequency, decay, phase-shift
Let's assert that the Gaussian center is equal to the frequency and the Gaussian sigma squared is the decay parameter for the sine wave.
import matplotlib.pyplot as plt
import numpy as np
from lmfit import Parameters, minimize, report_fit
from lmfit.lineshapes import gaussian
# parameters for mock data
gaus_amp = 200
gaus_cen = 4.5
gaus_sig = 7.0
sine_amp = 17
sine_shift = -0.75
sine_freq = gaus_cen
sine_decay = gaus_sig**2
# dataset 1: gaussian
x1 = np.linspace(-30, 30, 121)
y1 = gaussian(x1, gaus_amp, gaus_cen, gaus_sig)
y1 += np.random.normal(scale=0.25, size=len(y1))
# dataset 2: decaying sine wave
x2 = np.linspace(0, 150, 601)
y2 = sine_amp * np.sin(sine_shift +x2/sine_freq) * np.exp(-x2**2/sine_decay**2)
y2 += np.random.normal(scale=0.5, size=len(y2))
def objective(params, x1, x2, y1, y2):
gamp = params['g_amp']
gcen = params['g_cen']
gsig = params['g_sig']
gresid = y1 - gaussian(x1, gamp, gcen, gsig)
samp = params['s_amp']
sshift = params['s_shift']
sfreq = gcen
sdecay = gsig**2
sresid = y2 - samp * np.sin(sshift +x2/sfreq) * np.exp(-x2**2/sdecay**2)
# now concatenate the residuals of the data sets
return np.concatenate((gresid, sresid))
params = Parameters()
params.add('g_amp', value=150, min=0)
params.add('g_cen', value=5, min=0)
params.add('g_sig', value=5, min=0)
params.add('s_amp', value=10, min=0)
params.add('s_shift', value=-0.5, min=-3, max=3)
# Run the fit and show the fitting result
out = minimize(objective, params, args=(x1, x2, y1, y2))
report_fit(out.params)
The key here is np.concatenate()
to make a single 1-D residual array from the residuals of the individual datasets.
This example will run and print out the results of
[[Variables]]
g_amp: 201.092389 +/- 1.74123974 (0.87%) (init = 150)
g_cen: 4.50385672 +/- 0.00477268 (0.11%) (init = 5)
g_sig: 7.00094469 +/- 0.01821519 (0.26%) (init = 5)
s_amp: 16.8828293 +/- 0.08010449 (0.47%) (init = 10)
s_shift: -0.74422074 +/- 0.00555315 (0.75%) (init = -0.5)
[[Correlations]] (unreported correlations are < 0.100)
C(g_cen, s_shift) = +0.7679
C(g_sig, s_amp) = -0.5904
C(g_amp, g_sig) = +0.1502
I'll leave it as an exercise for the reader to plot the fits to the two datasets and think about extending that to 30 datasets ;).