I have a curve: enter image description here
and its digitized data. https://drive.google.com/open?id=1ZB39G3SmtamjVjmLzkC2JefloZ9iShpO
How should I choose a suitable function for the least squares method and how this approximation can be implemented on Python?
I tried to do it like that
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym
x = np.loadtxt("x_data.txt", delimiter='\t', dtype=np.float)
y = np.loadtxt("y_data.txt", delimiter='\t', dtype=np.float)
plt.plot(x, y, 'ro',label="Original Data")
x = np.array(x, dtype=float)
y = np.array(y, dtype=float)
def func(x, a, b, c, d):
return a*x**3 + b*x**2 +c*x + d
popt, pcov = curve_fit(func, x, y)
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '')
plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)
plt.plot(x, func(x, *popt), label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
thanks
I extracted data from your plot for analysis, and here is my first cut at the problem. Since your plot used decade-log scaling, I took the anti-decade-log of the "left-side" Y data for fitting. My peak equation search on this "extracted" data turned up a log-normal type of equation, and here is a graphical python fitter reading in data files and fitting to this equation. This fitter uses the scipy differential_evolution genetic algorithm module to determine initial parameter estimates for the non-linear fitter, which requires parameter ranges within which to search. In this code the data maximum and minimum values are used along with my range estimates. It is much easier to estimate ranges on initial parameter values than specific values. This fitter should be able to directly read your data files. If you can post or link to the actual data I might be able to make a better fit than is shown here.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
with open('./x_data.txt', 'rt') as f:
x_file = f.read()
with open('./y_data.txt', 'rt') as f:
y_file = f.read()
xlist = []
for line in x_file.split('\n'):
if line: # this allows blank lines in file
xlist.append(float(line.strip()))
ylist = []
for line in y_file.split('\n'):
if line: # this allows blank lines in file
ylist.append(float(line.strip()))
if len(xlist) != len(ylist):
print(len(xlist), len(ylist))
raise Exception('X and Y habe different length')
xData = numpy.array(xlist)
yData = numpy.array(ylist)
def func(t, a, b, c, d): # Log-Normal Peak A Shifted from zunzun.com
return a * numpy.exp(-0.5 * numpy.power((numpy.log(t-d)-b) / c, 2.0))
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([0.0, 2.0]) # search bounds for b
parameterBounds.append([-1.0, 0.0]) # search bounds for c
parameterBounds.append([-maxX, 0.0]) # search bounds for d
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData), 1000)
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
EDIT: use actual data
With the actual data now available I found that a Pulse peak equation gives a good fit to the data, here is an updated fitter. I recommend taking additional data from time 0 to 5 if possible, this will yield data that better characterizes the region at the beginning of the peak.
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings
with open('./x_data.txt', 'rt') as f:
x_file = f.read()
with open('./y_data.txt', 'rt') as f:
y_file = f.read()
xlist = []
for line in x_file.split('\n'):
if line: # this allows blank lines in file
xlist.append(float(line.strip()))
ylist = []
for line in y_file.split('\n'):
if line: # this allows blank lines in file
ylist.append(float(line.strip()))
if len(xlist) != len(ylist):
print(len(xlist), len(ylist))
raise Exception('X and Y have different length')
xData = numpy.array(xlist)
yData = numpy.array(ylist)
def func(t, a, b, c, Offset): # Pulse Peak With Offset from zunzun.com
return 4.0 * a * numpy.exp(-1.0 * (t-b) / c) * (1.0 - numpy.exp(-1.0 * (t-b) / c)) + Offset
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([-5.0, 0.0]) # search bounds for b
parameterBounds.append([1.0, 10.0]) # search bounds for c
parameterBounds.append([minY, maxY]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()
# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()
modelPredictions = func(xData, *fittedParameters)
absError = modelPredictions - yData
SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)
print()
##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plot
xModel = numpy.linspace(min(xData), max(xData), 1000)
yModel = func(xModel, *fittedParameters)
# now the model as a line plot
axes.plot(xModel, yModel)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)