Search code examples
pythonregressionnon-linear-regression

how to do exponential nonlinear regression in python


I am trying to do non-linear regression using the equation

y=ae^(-bT)

where T is temp with the data:

([26.67, 93.33, 148.89, 222.01, 315.56])

and y is the viscosity with the data:

([1.35, .085, .012, .0049, .00075])

the goal is to determine the value of a and b WITHOUT linearizing the equation also to plot the graph. so far one method ive tried is:

import matplotlib
matplotlib.use('Qt4Agg')
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit 
def func(x, a, b):
    return a*(np.exp(-b * x)) 
#data
temp = np.array([26.67, 93.33, 148.89, 222.01, 315.56])
Viscosity = np.array([1.35, .085, .012, .0049, .00075])
initialGuess=[200,1]
guessedFactors=[func(x,*initialGuess ) for x in temp]
#curve fit
popt,pcov = curve_fit(func, temp, Viscosity,initialGuess)
print (popt)
print (pcov)
tempCont=np.linspace(min(temp),max(temp),50)
fittedData=[func(x, *popt) for x in tempCont]
fig1 = plt.figure(1)
ax=fig1.add_subplot(1,1,1)
###the three sets of data to plot
ax.plot(temp,Viscosity,linestyle='',marker='o', color='r',label="data")
ax.plot(temp,guessedFactors,linestyle='',marker='^', color='b',label="initial guess")
###beautification
ax.legend(loc=0, title="graphs", fontsize=12)
ax.set_ylabel("Viscosity")
ax.set_xlabel("temp")
ax.grid()
ax.set_title("$\mathrm{curve}_\mathrm{fit}$")
###putting the covariance matrix nicely
tab= [['{:.2g}'.format(j) for j in i] for i in pcov]
the_table = plt.table(cellText=tab,
                  colWidths = [0.2]*3,
                  loc='upper right', bbox=[0.483, 0.35, 0.5, 0.25] )
plt.text(250,65,'covariance:',size=12)
###putting the plot
plt.show()

im pretty sure ive made it overly complicated and messed up.


Solution

  • Here is example code using your data and equation, with scipy's differential_evolution genetic algorithm used to determine initial parameter estimates for the non-linear fitter. The scipy implementation of Differential Evolution uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, here I have given what I thought were ranges within which the fitted parameters should exist.

    import numpy, scipy, matplotlib
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    from scipy.optimize import differential_evolution
    import warnings
    
    xData = numpy.array([26.67, 93.33, 148.89, 222.01, 315.56])
    yData = numpy.array([1.35, .085, .012, .0049, .00075])
    
    
    def func(T, a, b):
        return a * numpy.exp(-b*T)
    
    
    # function for genetic algorithm to minimize (sum of squared error)
    def sumOfSquaredError(parameterTuple):
        warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
        val = func(xData, *parameterTuple)
        return numpy.sum((yData - val) ** 2.0)
    
    
    def generate_Initial_Parameters():
        parameterBounds = []
        parameterBounds.append([0.0, 10.0]) # search bounds for a
        parameterBounds.append([-1.0, 1.0]) # search bounds for b
    
        # "seed" the numpy random number generator for repeatable results
        result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
        return result.x
    
    # by default, differential_evolution completes by calling curve_fit() using parameter bounds
    geneticParameters = generate_Initial_Parameters()
    
    # now call curve_fit without passing bounds from the genetic algorithm,
    # just in case the best fit parameters are aoutside those bounds
    fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
    print('Fitted parameters:', fittedParameters)
    print()
    
    modelPredictions = func(xData, *fittedParameters) 
    
    absError = modelPredictions - yData
    
    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
    
    print()
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)
    
    print()
    
    
    ##########################################################
    # graphics output section
    def ModelAndScatterPlot(graphWidth, graphHeight):
        f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
        axes = f.add_subplot(111)
    
        # first the raw data as a scatter plot
        axes.plot(xData, yData,  'D')
    
        # create data for the fitted equation plot
        xModel = numpy.linspace(min(xData), max(xData))
        yModel = func(xModel, *fittedParameters)
    
        # now the model as a line plot
        axes.plot(xModel, yModel)
    
        axes.set_xlabel('temp') # X axis data label
        axes.set_ylabel('viscosity') # Y axis data label
    
        plt.show()
        plt.close('all') # clean up after using pyplot
    
    graphWidth = 800
    graphHeight = 600
    ModelAndScatterPlot(graphWidth, graphHeight)