Search code examples
pythonscipycurve-fittinguncertainty

Gaussian fit with consideration of uncertainties


I'm having trouble understandig what is wrong with the following piece of code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *

def gauss(p,x):
    return p[0]*np.exp(-(x-p[1])**2/(2*p[2]**2)+p[4]) + p[3]

# Create a model for fitting.
gg = Model(gauss)

x = np.arange(0, 350)

# Create a RealData object using our initiated data from above.
data = RealData(x, y_data, sx=0, sy=y_data_err)

# Set up ODR with the model and data.
odr = ODR(data, gg, beta0=[0.1, 1., 1.0, 1.0, 1.0])


# Run the regression.
out = odr.run()

# Use the in-built pprint method to give us results.
out.pprint()

x_fit = np.linspace(x[0], x[-1], 1000)
y_fit = gauss(out.beta, x_fit)

plt.figure()
plt.errorbar(x, xy_data xerr=0, yerr=y_data_err, linestyle='None', marker='x')
plt.plot(x_fit, y_fit)

plt.show()

This was straight up copied from here with only changing the model. The error that I get is

scipy.odr.odrpack.odr_error: number of observations do not match

But as far as I can tell beta0 has five parameters, which is exactly as many as gauss needs to work. Would be great if someone could point to the error-source or my misconception.


Solution

  • Here is a graphing fitter with your equation, comparing both ODR and curve_fit on one graph. The example uses scipy's differential_evolution genetic algorithm module to determine initial parameter estimates for the solvers, and that module implements the Latin Hypercube algorithm to ensure a thorough search of parameter space which requires bounds within which to search. In this example, those bounds are taken from the data maximum and minimum values. As your post did not include data, I have used my own test data in the example. In this example the two fitted curves look very similar, diverging slightly at the plotted extremes. comparison

    import numpy, scipy, matplotlib
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    import scipy.odr
    from scipy.optimize import differential_evolution
    import warnings
    
    xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
    yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
    
    
    def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
        return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset
    
    
    def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
        return func(x, *parameters)
    
    
    # function for genetic algorithm to minimize (sum of squared error)
    def sumOfSquaredError(parameterTuple):
        warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
        val = func(xData, *parameterTuple)
        return numpy.sum((yData - val) ** 2.0)
    
    
    def generate_Initial_Parameters():
        # min and max used for bounds
        maxX = max(xData)
        minX = min(xData)
        maxY = max(yData)
        minY = min(yData)
    
        parameterBounds = []
        parameterBounds.append([minY, maxY]) # search bounds for a
        parameterBounds.append([minX, maxX]) # search bounds for b
        parameterBounds.append([minX, maxX]) # search bounds for c
        parameterBounds.append([minY, maxY]) # search bounds for d
        parameterBounds.append([0.0, maxY]) # search bounds for Offset
    
        # "seed" the numpy random number generator for repeatable results
        result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
        return result.x
    
    geneticParameters = generate_Initial_Parameters()
    
    
    ##########################
    # curve_fit section
    ##########################
    
    fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
    print('Fitted parameters curve_fit:', fittedParameters_curvefit)
    print()
    
    modelPredictions_curvefit = func(xData, *fittedParameters_curvefit) 
    
    absError_curvefit = modelPredictions_curvefit - yData
    
    SE_curvefit = numpy.square(absError_curvefit) # squared errors
    MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
    RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
    Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))
    
    print()
    print('RMSE curve_fit:', RMSE_curvefit)
    print('R-squared curve_fit:', Rsquared_curvefit)
    
    print()
    
    
    ##########################
    # ODR section
    ##########################
    data = scipy.odr.odrpack.Data(xData,yData)
    model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
    odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)
    
    # Run the regression.
    odr_out = odr.run()
    
    print('Fitted parameters ODR:', odr_out.beta)
    print()
    
    modelPredictions_odr = func(xData, *odr_out.beta) 
    
    absError_odr = modelPredictions_odr - yData
    
    SE_odr = numpy.square(absError_odr) # squared errors
    MSE_odr = numpy.mean(SE_odr) # mean squared errors
    RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
    Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))
    
    print()
    print('RMSE ODR:', RMSE_odr)
    print('R-squared ODR:', Rsquared_odr)
    
    print()
    
    
    ##########################################################
    # graphics output section
    def ModelsAndScatterPlot(graphWidth, graphHeight):
        f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
        axes = f.add_subplot(111)
    
        # first the raw data as a scatter plot
        axes.plot(xData, yData,  'D')
    
        # create data for the fitted equation plots
        xModel = numpy.linspace(min(xData), max(xData))
        yModel_curvefit = func(xModel, *fittedParameters_curvefit)
        yModel_odr = func(xModel, *odr_out.beta)
    
        # now the models as line plots
        axes.plot(xModel, yModel_curvefit)
        axes.plot(xModel, yModel_odr)
    
        axes.set_xlabel('X Data') # X axis data label
        axes.set_ylabel('Y Data') # Y axis data label
    
        plt.show()
        plt.close('all') # clean up after using pyplot
    
    graphWidth = 800
    graphHeight = 600
    ModelsAndScatterPlot(graphWidth, graphHeight)