Search code examples
error-handlingscipycurve-fittingone-definition-rule

Using scipy.odr to fit curve


I'm trying to fit a set of data points via a fit function that depends on two variables, let's call these xdata and sdata. Problem is my curve is rather flat I want it to more or less "follow the points".

I've tried using scipy.odr to fit the curve it works rather well except that the curve is too flat:

import numpy as np
from math import pi
from math import sqrt
from math import log
from scipy import optimize
import scipy.optimize
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.odr import *

mudr=np.array([ 57.43708609,  46.26119205,  55.60688742,  33.21615894,
        28.27072848,  22.54649007,  21.80662252,  11.21483444,   5.80211921]) 
#xdata points
dme=array([ 128662.54890776,  105265.32915726,  128652.56835434,
         77968.67019573,   66273.56542068,   58464.58559543,
         54570.66624991,   27286.90038703,   19480.92689266]) #xdata error
dmss22=np.array([  4.90050000e+17,   4.90050000e+17,   4.90050000e+17,
         4.90050000e+17,   4.90050000e+17,   4.90050000e+17,
         4.90050000e+17,   4.90050000e+17,   4.90050000e+17]) #sdata points
dmse=np.array([  1.09777592e+21,   1.11512117e+21,   1.13381702e+21,
         1.15033267e+21,   1.14883089e+21,   1.27076265e+21,
         1.22637165e+21,   1.19237598e+21,   1.64539205e+21]) # sdata error
F=np.array([ 115.01944248,  110.24354867,  112.77812389,  104.81830088,
        104.35746903,  101.32016814,  100.54513274,   96.94226549,
         93.00424779]) #ydata points
dF=np.array([  72710.75386699,   72590.6256987 ,  176539.40403673,
        130555.27503081,  124299.52080164,  176426.64340597,
        143013.52848306,  122117.93022746,  157547.78395513])#ydata error

def Ffitsso(p,X,B=2.58,Fc=92.2,mu=770,Za=0.9468): #fitfunction
    temp1 = (2*B*X[0])/(4*pi*Fc)**2
    temp2 = temp1*(afij[0]+afij[1]*np.log((2*B*X[0])/mu**2))
    temp3 = temp1**2*(afij[2]+afij[3]*np.log((2*B*X[0])/mu**2)+\
                   afij[4]*(np.log((2*B*X[0])/mu**2))**2)
    temp4 = temp1**3*(afij[5]+afij[6]*np.log((2*B*X[0])/mu**2)+\
                      afij[7]*(np.log((2*B*X[0])/mu**2))**2+\
                      afij[8]*(np.log((2*B*X[0])/mu**2))**3)
    return Fc/Za*(1+p[0]*X[1])*(1+temp2+temp3+temp4)+p[1]

#fitting using scipy.odr
xtot=np.row_stack( (mudr, dmss22) )
etot=np.row_stack( (Ze, dmss22e) )
fitting = Model(Ffitsso)
mydata = RealData(xtot, F, sx=etot2, sy=dF)
myodr = ODR(mydata, fitting, beta0=[0, 100])
myoutput = myodr.run()
myoutput.pprint()
bet=myoutput.beta

plt.plot(mudr,F,"b^")
plt.plot(mudr,Ffitsso(bet,[mudr,dmss22]))

p[0]*X[0] in the fitfunction is supposed to be small compared to 1 but with the fit the value for p[0] is in order of e-18 whilst dmss22 values are in the order of e-17 which is not small enough.

Even worse is that it's negative meaning the function decreases which is not supposed to happen it's supposed to increase like the plotted data points.

Edit: I fixed, didn't know that it was so sensitive to initial beta values, put beta[0]=1.5*10(-15) and it works!**


Solution

  • Here is a graphical fitter with both curve_fit and ODR fitters using scipy's Differential Evolution (DE) genetic algorithm to supply initial parameter estimates for the non-linear solvers. The scipy implementation of DE uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, and this requires parameter bounds within which to search - in this example, these bounds are taken from the data maximum and minimum values. Note that it is much easier to give bounds for the initial parameter estimates rather than individual specific values.

    import numpy, scipy, matplotlib
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    import scipy.odr
    from scipy.optimize import differential_evolution
    import warnings
    
    xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
    yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
    
    
    def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
        return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset
    
    
    def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
        return func(x, *parameters)
    
    
    # function for genetic algorithm to minimize (sum of squared error)
    def sumOfSquaredError(parameterTuple):
        warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
        val = func(xData, *parameterTuple)
        return numpy.sum((yData - val) ** 2.0)
    
    
    def generate_Initial_Parameters():
        # min and max used for bounds
        maxX = max(xData)
        minX = min(xData)
        maxY = max(yData)
        minY = min(yData)
    
        parameterBounds = []
        parameterBounds.append([minY, maxY]) # search bounds for a
        parameterBounds.append([minX, maxX]) # search bounds for b
        parameterBounds.append([minX, maxX]) # search bounds for c
        parameterBounds.append([minY, maxY]) # search bounds for d
        parameterBounds.append([0.0, maxY]) # search bounds for Offset
    
        # "seed" the numpy random number generator for repeatable results
        result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
        return result.x
    
    geneticParameters = generate_Initial_Parameters()
    
    
    ##########################
    # curve_fit section
    ##########################
    
    fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
    print('Fitted parameters curve_fit:', fittedParameters_curvefit)
    print()
    
    modelPredictions_curvefit = func(xData, *fittedParameters_curvefit) 
    
    absError_curvefit = modelPredictions_curvefit - yData
    
    SE_curvefit = numpy.square(absError_curvefit) # squared errors
    MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
    RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
    Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))
    
    print()
    print('RMSE curve_fit:', RMSE_curvefit)
    print('R-squared curve_fit:', Rsquared_curvefit)
    
    print()
    
    
    ##########################
    # ODR section
    ##########################
    data = scipy.odr.odrpack.Data(xData,yData)
    model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
    odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)
    
    # Run the regression.
    odr_out = odr.run()
    
    print('Fitted parameters ODR:', odr_out.beta)
    print()
    
    modelPredictions_odr = func(xData, *odr_out.beta) 
    
    absError_odr = modelPredictions_odr - yData
    
    SE_odr = numpy.square(absError_odr) # squared errors
    MSE_odr = numpy.mean(SE_odr) # mean squared errors
    RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
    Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))
    
    print()
    print('RMSE ODR:', RMSE_odr)
    print('R-squared ODR:', Rsquared_odr)
    
    print()
    
    
    ##########################################################
    # graphics output section
    def ModelsAndScatterPlot(graphWidth, graphHeight):
        f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
        axes = f.add_subplot(111)
    
        # first the raw data as a scatter plot
        axes.plot(xData, yData,  'D')
    
        # create data for the fitted equation plots
        xModel = numpy.linspace(min(xData), max(xData))
        yModel_curvefit = func(xModel, *fittedParameters_curvefit)
        yModel_odr = func(xModel, *odr_out.beta)
    
        # now the models as line plots
        axes.plot(xModel, yModel_curvefit)
        axes.plot(xModel, yModel_odr)
    
        axes.set_xlabel('X Data') # X axis data label
        axes.set_ylabel('Y Data') # Y axis data label
    
        plt.show()
        plt.close('all') # clean up after using pyplot
    
    graphWidth = 800
    graphHeight = 600
    ModelsAndScatterPlot(graphWidth, graphHeight)