python matplotlib curve-fitting covariance-matrix

Best fit of an increasing function that becomes constant

I am trying to fit a function to the following data.

x_data = np.array([0.01      , 0.01871795, 0.0274359 , 0.03615385, 0.04487179,
       0.05358974, 0.06230769, 0.07102564, 0.07974359, 0.08846154,
       0.09717949, 0.10589744, 0.11461538, 0.12333333, 0.13205128,
       0.14076923, 0.14948718, 0.15820513, 0.16692308, 0.17564103,
       0.18435897, 0.19307692, 0.20179487, 0.21051282, 0.21923077,
       0.22794872, 0.23666667, 0.24538462, 0.25410256, 0.26282051,
       0.27153846, 0.28025641, 0.28897436, 0.29769231, 0.30641026,
       0.31512821, 0.32384615, 0.3325641 , 0.34128205, 0.35      ])
y_data = array([0.07462271, 0.12197987, 0.15732335, 0.18376046, 0.2035856 ,
       0.21849438, 0.22974112, 0.23825469, 0.24472376, 0.24965963,
       0.25344248, 0.25635547, 0.2586099 , 0.26036377, 0.26173554,
       0.26281424, 0.26366699, 0.26434454, 0.26488545, 0.2653191 ,
       0.265668  , 0.26594946, 0.26617689, 0.26636074, 0.26650917,
       0.26662865, 0.2667243 , 0.26680021, 0.26685972, 0.26690551,
       0.26693978, 0.26696438, 0.26698081, 0.26699036, 0.2669941 ,
       0.26699297, 0.26698774, 0.26697912, 0.26696768, 0.26695394])

I have tried a general power function however it did not capture the fact that after a certain x value the y values becomes more or less constant. I also tried fitting a general piecewise function which is a power law when xa.

def fpiece(x, a, b, c, d):
    return np.where(x < a, b*np.power(x, c), d)

# initial guess
pars0 = (0.15, 0.4, 1, 0.25)

# perform fitting with custom weights
popt, pcov = curve_fit(fpiece, x_data, y_data, p0=pars0, maxfev=5000)

# plot data
plt.errorbar(x_data, y_data, yerr=0, fmt =".", color='black', label ='data', zorder=1, markersize=10)

# creating x interval to include in y fit
x_interval = np.linspace(0, max(x_data), len(x_data))
y_fit = fpiece( x_interval , *popt)

plt.plot( x_interval, y_fit, color = "red", label = "Best fit", zorder=2 , linewidth=3)

plt.grid(True)
plt.ylabel("y data")
plt.xlabel("x data")
plt.title("Best fit of data")
plt.legend()
plt.show()

However, you can see below that it doesn't fit the curve exactly.

Solution

You need a "switch" function - one that goes through the origin but asymptotes/tops-out to your final value.

I tried a few but the best I could get was y=a.(1-exp(-bx)).

Here:

a =  0.2664100717998893     b =  32.14540706754588

(Note that you would get an even better visual fit to the curved part of the graph by removing the last 10 or so data points, because it is currently weighting heavily and unnecessarily to the points where y is essentially constant.)

Code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit


x_data = np.array([0.01      , 0.01871795, 0.0274359 , 0.03615385, 0.04487179,
       0.05358974, 0.06230769, 0.07102564, 0.07974359, 0.08846154,
       0.09717949, 0.10589744, 0.11461538, 0.12333333, 0.13205128,
       0.14076923, 0.14948718, 0.15820513, 0.16692308, 0.17564103,
       0.18435897, 0.19307692, 0.20179487, 0.21051282, 0.21923077,
       0.22794872, 0.23666667, 0.24538462, 0.25410256, 0.26282051,
       0.27153846, 0.28025641, 0.28897436, 0.29769231, 0.30641026,
       0.31512821, 0.32384615, 0.3325641 , 0.34128205, 0.35      ])
y_data = np.array([0.07462271, 0.12197987, 0.15732335, 0.18376046, 0.2035856 ,
       0.21849438, 0.22974112, 0.23825469, 0.24472376, 0.24965963,
       0.25344248, 0.25635547, 0.2586099 , 0.26036377, 0.26173554,
       0.26281424, 0.26366699, 0.26434454, 0.26488545, 0.2653191 ,
       0.265668  , 0.26594946, 0.26617689, 0.26636074, 0.26650917,
       0.26662865, 0.2667243 , 0.26680021, 0.26685972, 0.26690551,
       0.26693978, 0.26696438, 0.26698081, 0.26699036, 0.2669941 ,
       0.26699297, 0.26698774, 0.26697912, 0.26696768, 0.26695394])


def fpiece( x, a, b ):
    return a * ( 1.0 - np.exp( -b * x ) )


pars0 = ( 0.267, 1 )
popt, pcov = curve_fit( fpiece, x_data, y_data, p0=pars0 )
print( "a = ", popt[0], "    b = ", popt[1] )

# plot data
plt.errorbar(x_data, y_data, yerr=0, fmt =".", color='black', label ='data', zorder=1, markersize=10)
# creating x interval to include in y fit
x_interval = np.linspace(0, max(x_data), len(x_data))
y_fit = fpiece( x_interval , *popt)
plt.plot( x_interval, y_fit, color = "red", label = "Best fit", zorder=2 , linewidth=3)
plt.grid(True)
plt.ylabel("y data")
plt.xlabel("x data")
plt.title("Best fit of data")
plt.legend()
plt.show()