error-handling scipy curve-fitting one-definition-rule

Using scipy.odr to fit curve

I'm trying to fit a set of data points via a fit function that depends on two variables, let's call these xdata and sdata. Problem is my curve is rather flat I want it to more or less "follow the points".

I've tried using scipy.odr to fit the curve it works rather well except that the curve is too flat:

import numpy as np
from math import pi
from math import sqrt
from math import log
from scipy import optimize
import scipy.optimize
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.odr import *

mudr=np.array([ 57.43708609,  46.26119205,  55.60688742,  33.21615894,
        28.27072848,  22.54649007,  21.80662252,  11.21483444,   5.80211921]) 
#xdata points
dme=array([ 128662.54890776,  105265.32915726,  128652.56835434,
         77968.67019573,   66273.56542068,   58464.58559543,
         54570.66624991,   27286.90038703,   19480.92689266]) #xdata error
dmss22=np.array([  4.90050000e+17,   4.90050000e+17,   4.90050000e+17,
         4.90050000e+17,   4.90050000e+17,   4.90050000e+17,
         4.90050000e+17,   4.90050000e+17,   4.90050000e+17]) #sdata points
dmse=np.array([  1.09777592e+21,   1.11512117e+21,   1.13381702e+21,
         1.15033267e+21,   1.14883089e+21,   1.27076265e+21,
         1.22637165e+21,   1.19237598e+21,   1.64539205e+21]) # sdata error
F=np.array([ 115.01944248,  110.24354867,  112.77812389,  104.81830088,
        104.35746903,  101.32016814,  100.54513274,   96.94226549,
         93.00424779]) #ydata points
dF=np.array([  72710.75386699,   72590.6256987 ,  176539.40403673,
        130555.27503081,  124299.52080164,  176426.64340597,
        143013.52848306,  122117.93022746,  157547.78395513])#ydata error

def Ffitsso(p,X,B=2.58,Fc=92.2,mu=770,Za=0.9468): #fitfunction
    temp1 = (2*B*X[0])/(4*pi*Fc)**2
    temp2 = temp1*(afij[0]+afij[1]*np.log((2*B*X[0])/mu**2))
    temp3 = temp1**2*(afij[2]+afij[3]*np.log((2*B*X[0])/mu**2)+\
                   afij[4]*(np.log((2*B*X[0])/mu**2))**2)
    temp4 = temp1**3*(afij[5]+afij[6]*np.log((2*B*X[0])/mu**2)+\
                      afij[7]*(np.log((2*B*X[0])/mu**2))**2+\
                      afij[8]*(np.log((2*B*X[0])/mu**2))**3)
    return Fc/Za*(1+p[0]*X[1])*(1+temp2+temp3+temp4)+p[1]

#fitting using scipy.odr
xtot=np.row_stack( (mudr, dmss22) )
etot=np.row_stack( (Ze, dmss22e) )
fitting = Model(Ffitsso)
mydata = RealData(xtot, F, sx=etot2, sy=dF)
myodr = ODR(mydata, fitting, beta0=[0, 100])
myoutput = myodr.run()
myoutput.pprint()
bet=myoutput.beta

plt.plot(mudr,F,"b^")
plt.plot(mudr,Ffitsso(bet,[mudr,dmss22]))

p[0]*X[0] in the fitfunction is supposed to be small compared to 1 but with the fit the value for p[0] is in order of e-18 whilst dmss22 values are in the order of e-17 which is not small enough.

Even worse is that it's negative meaning the function decreases which is not supposed to happen it's supposed to increase like the plotted data points.

Edit: I fixed, didn't know that it was so sensitive to initial beta values, put beta[0]=1.5*10(-15) and it works!**

Solution

Here is a graphical fitter with both curve_fit and ODR fitters using scipy's Differential Evolution (DE) genetic algorithm to supply initial parameter estimates for the non-linear solvers. The scipy implementation of DE uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, and this requires parameter bounds within which to search - in this example, these bounds are taken from the data maximum and minimum values. Note that it is much easier to give bounds for the initial parameter estimates rather than individual specific values.

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.odr
from scipy.optimize import differential_evolution
import warnings

xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])


def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
    return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset


def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
    return func(x, *parameters)


# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([minY, maxY]) # search bounds for a
    parameterBounds.append([minX, maxX]) # search bounds for b
    parameterBounds.append([minX, maxX]) # search bounds for c
    parameterBounds.append([minY, maxY]) # search bounds for d
    parameterBounds.append([0.0, maxY]) # search bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

geneticParameters = generate_Initial_Parameters()


##########################
# curve_fit section
##########################

fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters curve_fit:', fittedParameters_curvefit)
print()

modelPredictions_curvefit = func(xData, *fittedParameters_curvefit) 

absError_curvefit = modelPredictions_curvefit - yData

SE_curvefit = numpy.square(absError_curvefit) # squared errors
MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))

print()
print('RMSE curve_fit:', RMSE_curvefit)
print('R-squared curve_fit:', Rsquared_curvefit)

print()


##########################
# ODR section
##########################
data = scipy.odr.odrpack.Data(xData,yData)
model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)

# Run the regression.
odr_out = odr.run()

print('Fitted parameters ODR:', odr_out.beta)
print()

modelPredictions_odr = func(xData, *odr_out.beta) 

absError_odr = modelPredictions_odr - yData

SE_odr = numpy.square(absError_odr) # squared errors
MSE_odr = numpy.mean(SE_odr) # mean squared errors
RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))

print()
print('RMSE ODR:', RMSE_odr)
print('R-squared ODR:', Rsquared_odr)

print()


##########################################################
# graphics output section
def ModelsAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plots
    xModel = numpy.linspace(min(xData), max(xData))
    yModel_curvefit = func(xModel, *fittedParameters_curvefit)
    yModel_odr = func(xModel, *odr_out.beta)

    # now the models as line plots
    axes.plot(xModel, yModel_curvefit)
    axes.plot(xModel, yModel_odr)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelsAndScatterPlot(graphWidth, graphHeight)