Search code examples
pythoncurve-fittingdata-fittingmodel-fittingscipy-optimize

How does one fit multiple independent and overlapping Lorentzian peaks in a set of data?


I need to fit several Lorentzian peaks in the same dataset, some of which are overlapping. What I need most from the function is the peak positions (centers) however I can't seem to fit all the peaks in these data.

I first tried using scipy's optimize curve fit, however I wasn't able to get the bounds to work and it would try to fit the full range of spectra. I've been using the python package lmfit with decent results, however I seem to be unable to get the fit to pick the overlapping peaks well.

you can see the raw spectra with marked peaks here and the results of my fitting here

You can find the data I am working with here

import os
import matplotlib.pyplot as plt
import numpy as np
from lmfit.models import LorentzianModel

test=np.loadtxt('filename.txt')

plt.figure()
#
lz1 = LorentzianModel(prefix='lz1_')
pars=lz1.guess(y,x=x)
pars.update(lz1.make_params())
pars['lz1_center'].set(0.61, min=0.5, max=0.66)
pars['lz1_amplitude'].set(0.028)
pars['lz1_sigma'].set(0.7)

lz2 = LorentzianModel(prefix='lz2_')
pars.update(lz2.make_params())
pars['lz2_center'].set(0.76, min=0.67, max=0.84)
pars['lz2_amplitude'].set(0.083)
pars['lz2_sigma'].set(0.04)

lz3 = LorentzianModel(prefix='lz3_')
pars.update(lz3.make_params())
pars['lz3_center'].set(0.85,min=0.84, max=0.92)
pars['lz3_amplitude'].set(0.048)
pars['lz3_sigma'].set(0.05)

lz4 = LorentzianModel(prefix='lz4_')
pars.update(lz4.make_params())
pars['lz4_center'].set(0.98, min=0.94, max=1.0)
pars['lz4_amplitude'].set(0.028)
pars['lz4_sigma'].set(0.02)

lz5 = LorentzianModel(prefix='lz5_')
pars.update(lz5.make_params())
pars['lz5_center'].set(1.1, min=1.0, max=1.2)
pars['lz5_amplitude'].set(0.037)
pars['lz5_sigma'].set(0.07)

lz6 = LorentzianModel(prefix='lz6_')
pars.update(lz6.make_params())
pars['lz6_center'].set(1.4, min=1.2, max=1.5)
pars['lz6_amplitude'].set(0.048)
pars['lz6_sigma'].set(0.45)

lz7 = LorentzianModel(prefix='lz7_')
pars.update(lz7.make_params())
pars['lz7_center'].set(1.54,min=1.4, max=1.6)
pars['lz7_amplitude'].set(0.037)
pars['lz7_sigma'].set(0.03)

lz8 = LorentzianModel(prefix='lz8_')
pars.update(lz8.make_params())
pars['lz8_center'].set(1.7, min=1.6, max=1.8)
pars['lz8_amplitude'].set(0.04)
pars['lz8_sigma'].set(0.17)

mod = lz1 + lz2 + lz3 + lz4 + lz5 + lz6 +lz7 + lz8
init = mod.eval(pars,x=x)

out=mod.fit(y,pars,x=x)
print(out.fit_report(min_correl=0.5))
plt.scatter(x,y, s=1)
plt.plot(x,init,'k:')
plt.plot(x,out.best_fit, 'r-')

Solution

  • Actually, just adding a quadratic background and lifting the bounds on the centroids should give a decent fit.

    Using your data, I modified your example a little::

    #!/usr/bin/env python
    import matplotlib.pyplot as plt
    import numpy as np
    from lmfit.models import LorentzianModel, QuadraticModel
    
    test = np.loadtxt('spectra.txt')
    xdat = test[0, :]
    ydat = test[1, :]
    
    def add_peak(prefix, center, amplitude=0.005, sigma=0.05):
        peak = LorentzianModel(prefix=prefix)
        pars = peak.make_params()
        pars[prefix + 'center'].set(center)
        pars[prefix + 'amplitude'].set(amplitude)
        pars[prefix + 'sigma'].set(sigma, min=0)
        return peak, pars
    
    model = QuadraticModel(prefix='bkg_')
    params = model.make_params(a=0, b=0, c=0)
    
    rough_peak_positions = (0.61, 0.76, 0.85, 0.99, 1.10, 1.40, 1.54, 1.7)
    for i, cen in enumerate(rough_peak_positions):
        peak, pars = add_peak('lz%d_' % (i+1), cen)
        model = model + peak
        params.update(pars)
    
    init = model.eval(params, x=xdat)
    result = model.fit(ydat, params, x=xdat)
    comps = result.eval_components()
    
    print(result.fit_report(min_correl=0.5))
    
    plt.plot(xdat, ydat, label='data')
    plt.plot(xdat, result.best_fit, label='best fit')
    for name, comp in comps.items():
        plt.plot(xdat, comp, '--', label=name)
    plt.legend(loc='upper right')
    plt.show()
    

    which prints a report of

    [[Model]]
        ((((((((Model(parabolic, prefix='bkg_') + Model(lorentzian, prefix='lz1_')) + Model(lorentzian, prefix='lz2_')) + Model(lorentzian, prefix='lz3_')) + Model(lorentzian, prefix='lz4_')) + Model(lorentzian, prefix='lz5_')) + Model(lorentzian, prefix='lz6_')) + Model(lorentzian, prefix='lz7_')) + Model(lorentzian, prefix='lz8_'))
    [[Fit Statistics]]
        # fitting method   = leastsq
        # function evals   = 1101
        # data points      = 800
        # variables        = 27
        chi-square         = 7.3824e-04
        reduced chi-square = 9.5504e-07
        Akaike info crit   = -11062.6801
        Bayesian info crit = -10936.1956
    [[Variables]]
        bkg_c:          0.03630504 +/- 9.4269e-04 (2.60%) (init = 0)
        bkg_b:         -0.05150031 +/- 0.00272084 (5.28%) (init = 0)
        bkg_a:          0.02285577 +/- 0.00109543 (4.79%) (init = 0)
        lz1_sigma:      0.03853490 +/- 0.00224206 (5.82%) (init = 0.05)
        lz1_center:     0.60596282 +/- 0.00101699 (0.17%) (init = 0.61)
        lz1_amplitude:  0.00121362 +/- 8.0862e-05 (6.66%) (init = 0.005)
        lz1_fwhm:       0.07706979 +/- 0.00448412 (5.82%) == '2.0000000*lz1_sigma'
        lz1_height:     0.01002487 +/- 3.1221e-04 (3.11%) == '0.3183099*lz1_amplitude/max(2.220446049250313e-16, lz1_sigma)'
        lz2_sigma:      0.03534226 +/- 3.5893e-04 (1.02%) (init = 0.05)
        lz2_center:     0.76784323 +/- 1.9002e-04 (0.02%) (init = 0.76)
        lz2_amplitude:  0.00738785 +/- 8.9378e-05 (1.21%) (init = 0.005)
        lz2_fwhm:       0.07068452 +/- 7.1786e-04 (1.02%) == '2.0000000*lz2_sigma'
        lz2_height:     0.06653864 +/- 3.6663e-04 (0.55%) == '0.3183099*lz2_amplitude/max(2.220446049250313e-16, lz2_sigma)'
        lz3_sigma:      0.03948780 +/- 0.00111507 (2.82%) (init = 0.05)
        lz3_center:     0.85427526 +/- 5.4206e-04 (0.06%) (init = 0.85)
        lz3_amplitude:  0.00317016 +/- 1.1244e-04 (3.55%) (init = 0.005)
        lz3_fwhm:       0.07897560 +/- 0.00223015 (2.82%) == '2.0000000*lz3_sigma'
        lz3_height:     0.02555459 +/- 3.9771e-04 (1.56%) == '0.3183099*lz3_amplitude/max(2.220446049250313e-16, lz3_sigma)'
        lz4_sigma:      0.02983045 +/- 0.00283845 (9.52%) (init = 0.05)
        lz4_center:     0.99544342 +/- 0.00142552 (0.14%) (init = 0.99)
        lz4_amplitude:  6.9114e-04 +/- 7.6016e-05 (11.00%) (init = 0.005)
        lz4_fwhm:       0.05966089 +/- 0.00567690 (9.52%) == '2.0000000*lz4_sigma'
        lz4_height:     0.00737492 +/- 3.6918e-04 (5.01%) == '0.3183099*lz4_amplitude/max(2.220446049250313e-16, lz4_sigma)'
        lz5_sigma:      0.06666333 +/- 0.00196152 (2.94%) (init = 0.05)
        lz5_center:     1.10162076 +/- 7.8293e-04 (0.07%) (init = 1.1)
        lz5_amplitude:  0.00522275 +/- 2.2587e-04 (4.32%) (init = 0.005)
        lz5_fwhm:       0.13332666 +/- 0.00392304 (2.94%) == '2.0000000*lz5_sigma'
        lz5_height:     0.02493807 +/- 4.7491e-04 (1.90%) == '0.3183099*lz5_amplitude/max(2.220446049250313e-16, lz5_sigma)'
        lz6_sigma:      0.11712113 +/- 0.00307555 (2.63%) (init = 0.05)
        lz6_center:     1.43220451 +/- 0.00102240 (0.07%) (init = 1.4)
        lz6_amplitude:  0.01215451 +/- 5.1928e-04 (4.27%) (init = 0.005)
        lz6_fwhm:       0.23424227 +/- 0.00615109 (2.63%) == '2.0000000*lz6_sigma'
        lz6_height:     0.03303334 +/- 6.2184e-04 (1.88%) == '0.3183099*lz6_amplitude/max(2.220446049250313e-16, lz6_sigma)'
        lz7_sigma:      0.02603963 +/- 0.00335175 (12.87%) (init = 0.05)
        lz7_center:     1.55545329 +/- 0.00152567 (0.10%) (init = 1.54)
        lz7_amplitude:  4.6978e-04 +/- 7.1036e-05 (15.12%) (init = 0.005)
        lz7_fwhm:       0.05207926 +/- 0.00670351 (12.87%) == '2.0000000*lz7_sigma'
        lz7_height:     0.00574266 +/- 3.8805e-04 (6.76%) == '0.3183099*lz7_amplitude/max(2.220446049250313e-16, lz7_sigma)'
        lz8_sigma:      0.11332337 +/- 0.00336106 (2.97%) (init = 0.05)
        lz8_center:     1.79132485 +/- 0.00117968 (0.07%) (init = 1.7)
        lz8_amplitude:  0.00700579 +/- 3.2606e-04 (4.65%) (init = 0.005)
        lz8_fwhm:       0.22664674 +/- 0.00672212 (2.97%) == '2.0000000*lz8_sigma'
        lz8_height:     0.01967830 +/- 4.2422e-04 (2.16%) == '0.3183099*lz8_amplitude/max(2.220446049250313e-16, lz8_sigma)'
    [[Correlations]] (unreported correlations are < 0.500)
        C(bkg_b, bkg_a)                 = -0.993
        C(bkg_c, bkg_b)                 = -0.981
        C(bkg_c, bkg_a)                 =  0.966
        C(lz6_sigma, lz6_amplitude)     =  0.963
        C(lz8_sigma, lz8_amplitude)     =  0.935
        C(lz5_sigma, lz5_amplitude)     =  0.933
        C(bkg_b, lz6_amplitude)         = -0.907
        C(lz3_sigma, lz3_amplitude)     =  0.905
        <snip>
    

    and shows a plot of enter image description here

    That may not be perfect, but should give you a pretty good start.