python curve-fitting data-fitting model-fitting scipy-optimize

How does one fit multiple independent and overlapping Lorentzian peaks in a set of data?

I need to fit several Lorentzian peaks in the same dataset, some of which are overlapping. What I need most from the function is the peak positions (centers) however I can't seem to fit all the peaks in these data.

I first tried using scipy's optimize curve fit, however I wasn't able to get the bounds to work and it would try to fit the full range of spectra. I've been using the python package lmfit with decent results, however I seem to be unable to get the fit to pick the overlapping peaks well.

you can see the raw spectra with marked peaks here and the results of my fitting here

You can find the data I am working with here

import os
import matplotlib.pyplot as plt
import numpy as np
from lmfit.models import LorentzianModel

test=np.loadtxt('filename.txt')

plt.figure()
#
lz1 = LorentzianModel(prefix='lz1_')
pars=lz1.guess(y,x=x)
pars.update(lz1.make_params())
pars['lz1_center'].set(0.61, min=0.5, max=0.66)
pars['lz1_amplitude'].set(0.028)
pars['lz1_sigma'].set(0.7)

lz2 = LorentzianModel(prefix='lz2_')
pars.update(lz2.make_params())
pars['lz2_center'].set(0.76, min=0.67, max=0.84)
pars['lz2_amplitude'].set(0.083)
pars['lz2_sigma'].set(0.04)

lz3 = LorentzianModel(prefix='lz3_')
pars.update(lz3.make_params())
pars['lz3_center'].set(0.85,min=0.84, max=0.92)
pars['lz3_amplitude'].set(0.048)
pars['lz3_sigma'].set(0.05)

lz4 = LorentzianModel(prefix='lz4_')
pars.update(lz4.make_params())
pars['lz4_center'].set(0.98, min=0.94, max=1.0)
pars['lz4_amplitude'].set(0.028)
pars['lz4_sigma'].set(0.02)

lz5 = LorentzianModel(prefix='lz5_')
pars.update(lz5.make_params())
pars['lz5_center'].set(1.1, min=1.0, max=1.2)
pars['lz5_amplitude'].set(0.037)
pars['lz5_sigma'].set(0.07)

lz6 = LorentzianModel(prefix='lz6_')
pars.update(lz6.make_params())
pars['lz6_center'].set(1.4, min=1.2, max=1.5)
pars['lz6_amplitude'].set(0.048)
pars['lz6_sigma'].set(0.45)

lz7 = LorentzianModel(prefix='lz7_')
pars.update(lz7.make_params())
pars['lz7_center'].set(1.54,min=1.4, max=1.6)
pars['lz7_amplitude'].set(0.037)
pars['lz7_sigma'].set(0.03)

lz8 = LorentzianModel(prefix='lz8_')
pars.update(lz8.make_params())
pars['lz8_center'].set(1.7, min=1.6, max=1.8)
pars['lz8_amplitude'].set(0.04)
pars['lz8_sigma'].set(0.17)

mod = lz1 + lz2 + lz3 + lz4 + lz5 + lz6 +lz7 + lz8
init = mod.eval(pars,x=x)

out=mod.fit(y,pars,x=x)
print(out.fit_report(min_correl=0.5))
plt.scatter(x,y, s=1)
plt.plot(x,init,'k:')
plt.plot(x,out.best_fit, 'r-')

Solution

Actually, just adding a quadratic background and lifting the bounds on the centroids should give a decent fit.

Using your data, I modified your example a little::

#!/usr/bin/env python
import matplotlib.pyplot as plt
import numpy as np
from lmfit.models import LorentzianModel, QuadraticModel

test = np.loadtxt('spectra.txt')
xdat = test[0, :]
ydat = test[1, :]

def add_peak(prefix, center, amplitude=0.005, sigma=0.05):
    peak = LorentzianModel(prefix=prefix)
    pars = peak.make_params()
    pars[prefix + 'center'].set(center)
    pars[prefix + 'amplitude'].set(amplitude)
    pars[prefix + 'sigma'].set(sigma, min=0)
    return peak, pars

model = QuadraticModel(prefix='bkg_')
params = model.make_params(a=0, b=0, c=0)

rough_peak_positions = (0.61, 0.76, 0.85, 0.99, 1.10, 1.40, 1.54, 1.7)
for i, cen in enumerate(rough_peak_positions):
    peak, pars = add_peak('lz%d_' % (i+1), cen)
    model = model + peak
    params.update(pars)

init = model.eval(params, x=xdat)
result = model.fit(ydat, params, x=xdat)
comps = result.eval_components()

print(result.fit_report(min_correl=0.5))

plt.plot(xdat, ydat, label='data')
plt.plot(xdat, result.best_fit, label='best fit')
for name, comp in comps.items():
    plt.plot(xdat, comp, '--', label=name)
plt.legend(loc='upper right')
plt.show()

which prints a report of

[[Model]]
    ((((((((Model(parabolic, prefix='bkg_') + Model(lorentzian, prefix='lz1_')) + Model(lorentzian, prefix='lz2_')) + Model(lorentzian, prefix='lz3_')) + Model(lorentzian, prefix='lz4_')) + Model(lorentzian, prefix='lz5_')) + Model(lorentzian, prefix='lz6_')) + Model(lorentzian, prefix='lz7_')) + Model(lorentzian, prefix='lz8_'))
[[Fit Statistics]]
    # fitting method   = leastsq
    # function evals   = 1101
    # data points      = 800
    # variables        = 27
    chi-square         = 7.3824e-04
    reduced chi-square = 9.5504e-07
    Akaike info crit   = -11062.6801
    Bayesian info crit = -10936.1956
[[Variables]]
    bkg_c:          0.03630504 +/- 9.4269e-04 (2.60%) (init = 0)
    bkg_b:         -0.05150031 +/- 0.00272084 (5.28%) (init = 0)
    bkg_a:          0.02285577 +/- 0.00109543 (4.79%) (init = 0)
    lz1_sigma:      0.03853490 +/- 0.00224206 (5.82%) (init = 0.05)
    lz1_center:     0.60596282 +/- 0.00101699 (0.17%) (init = 0.61)
    lz1_amplitude:  0.00121362 +/- 8.0862e-05 (6.66%) (init = 0.005)
    lz1_fwhm:       0.07706979 +/- 0.00448412 (5.82%) == '2.0000000*lz1_sigma'
    lz1_height:     0.01002487 +/- 3.1221e-04 (3.11%) == '0.3183099*lz1_amplitude/max(2.220446049250313e-16, lz1_sigma)'
    lz2_sigma:      0.03534226 +/- 3.5893e-04 (1.02%) (init = 0.05)
    lz2_center:     0.76784323 +/- 1.9002e-04 (0.02%) (init = 0.76)
    lz2_amplitude:  0.00738785 +/- 8.9378e-05 (1.21%) (init = 0.005)
    lz2_fwhm:       0.07068452 +/- 7.1786e-04 (1.02%) == '2.0000000*lz2_sigma'
    lz2_height:     0.06653864 +/- 3.6663e-04 (0.55%) == '0.3183099*lz2_amplitude/max(2.220446049250313e-16, lz2_sigma)'
    lz3_sigma:      0.03948780 +/- 0.00111507 (2.82%) (init = 0.05)
    lz3_center:     0.85427526 +/- 5.4206e-04 (0.06%) (init = 0.85)
    lz3_amplitude:  0.00317016 +/- 1.1244e-04 (3.55%) (init = 0.005)
    lz3_fwhm:       0.07897560 +/- 0.00223015 (2.82%) == '2.0000000*lz3_sigma'
    lz3_height:     0.02555459 +/- 3.9771e-04 (1.56%) == '0.3183099*lz3_amplitude/max(2.220446049250313e-16, lz3_sigma)'
    lz4_sigma:      0.02983045 +/- 0.00283845 (9.52%) (init = 0.05)
    lz4_center:     0.99544342 +/- 0.00142552 (0.14%) (init = 0.99)
    lz4_amplitude:  6.9114e-04 +/- 7.6016e-05 (11.00%) (init = 0.005)
    lz4_fwhm:       0.05966089 +/- 0.00567690 (9.52%) == '2.0000000*lz4_sigma'
    lz4_height:     0.00737492 +/- 3.6918e-04 (5.01%) == '0.3183099*lz4_amplitude/max(2.220446049250313e-16, lz4_sigma)'
    lz5_sigma:      0.06666333 +/- 0.00196152 (2.94%) (init = 0.05)
    lz5_center:     1.10162076 +/- 7.8293e-04 (0.07%) (init = 1.1)
    lz5_amplitude:  0.00522275 +/- 2.2587e-04 (4.32%) (init = 0.005)
    lz5_fwhm:       0.13332666 +/- 0.00392304 (2.94%) == '2.0000000*lz5_sigma'
    lz5_height:     0.02493807 +/- 4.7491e-04 (1.90%) == '0.3183099*lz5_amplitude/max(2.220446049250313e-16, lz5_sigma)'
    lz6_sigma:      0.11712113 +/- 0.00307555 (2.63%) (init = 0.05)
    lz6_center:     1.43220451 +/- 0.00102240 (0.07%) (init = 1.4)
    lz6_amplitude:  0.01215451 +/- 5.1928e-04 (4.27%) (init = 0.005)
    lz6_fwhm:       0.23424227 +/- 0.00615109 (2.63%) == '2.0000000*lz6_sigma'
    lz6_height:     0.03303334 +/- 6.2184e-04 (1.88%) == '0.3183099*lz6_amplitude/max(2.220446049250313e-16, lz6_sigma)'
    lz7_sigma:      0.02603963 +/- 0.00335175 (12.87%) (init = 0.05)
    lz7_center:     1.55545329 +/- 0.00152567 (0.10%) (init = 1.54)
    lz7_amplitude:  4.6978e-04 +/- 7.1036e-05 (15.12%) (init = 0.005)
    lz7_fwhm:       0.05207926 +/- 0.00670351 (12.87%) == '2.0000000*lz7_sigma'
    lz7_height:     0.00574266 +/- 3.8805e-04 (6.76%) == '0.3183099*lz7_amplitude/max(2.220446049250313e-16, lz7_sigma)'
    lz8_sigma:      0.11332337 +/- 0.00336106 (2.97%) (init = 0.05)
    lz8_center:     1.79132485 +/- 0.00117968 (0.07%) (init = 1.7)
    lz8_amplitude:  0.00700579 +/- 3.2606e-04 (4.65%) (init = 0.005)
    lz8_fwhm:       0.22664674 +/- 0.00672212 (2.97%) == '2.0000000*lz8_sigma'
    lz8_height:     0.01967830 +/- 4.2422e-04 (2.16%) == '0.3183099*lz8_amplitude/max(2.220446049250313e-16, lz8_sigma)'
[[Correlations]] (unreported correlations are < 0.500)
    C(bkg_b, bkg_a)                 = -0.993
    C(bkg_c, bkg_b)                 = -0.981
    C(bkg_c, bkg_a)                 =  0.966
    C(lz6_sigma, lz6_amplitude)     =  0.963
    C(lz8_sigma, lz8_amplitude)     =  0.935
    C(lz5_sigma, lz5_amplitude)     =  0.933
    C(bkg_b, lz6_amplitude)         = -0.907
    C(lz3_sigma, lz3_amplitude)     =  0.905
    <snip>

and shows a plot of

That may not be perfect, but should give you a pretty good start.