Search code examples
pythonnumpymatplotlibscipycurve-fitting

Improving the quality of error function fit using Python


I am trying to do an error function fit on lenJ1 but the fitted curve is not that great. How can I improve the fitting? Ideally I would want R^2 value to be greater than 0.99,

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score

# Define the error function
def error_function(x, a, b, c, d):
    return a + b * np.exp(-(x - c)**2 / (2.0 * d**2))

# Data
lenJ1 = [337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 337, 336, 336, 334, 334, 333, 331, 331, 331, 329, 329, 328, 324, 323, 315, 308, 294, 283, 273, 264, 244, 234, 222, 217, 205, 188, 181, 174, 162, 151, 133, 126, 117, 112, 105, 96, 95, 87, 80, 73, 62, 59, 58, 52, 40, 35, 33, 31, 30, 29, 29, 26, 23, 21, 19, 18, 16, 15, 15, 15, 15, 15, 15, 14, 13, 12, 12, 12, 12, 12, 11, 10, 8, 8, 7, 7, 7, 6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0]
lenJ1 = [1 - (i / 1012) for i in lenJ1]

t_start = 0.0
t_end = 1e2
timesteps = int(1e3 * t_end) + 1
t1 = np.linspace(t_start, t_end, timesteps)
print(timesteps)

# Provide adjusted initial guesses for parameters based on the data characteristics
initial_guess = [1, 1, 1, 1]

# Fit the error function to the data with adjusted initial guesses
popt, pcov = curve_fit(error_function, t1[:len(lenJ1)]/1.0330677596203013, lenJ1, p0=initial_guess, bounds=([0, 0, 0, 0], [100, 100, 100, 100]))

# Predicted values using the fitted curve
predicted_values = error_function(t1[:len(lenJ1)]/1.0330677596203013, *popt)

# Calculate R^2 value
r_squared = r2_score(lenJ1, predicted_values)

# Plot the data and the fitted curve
plt.plot(t1[:len(lenJ1)], lenJ1, 'b-', label='Data')
plt.plot(t1[:len(lenJ1)], error_function(t1[:len(lenJ1)]/1.0330677596203013, *popt), 'r--', label='Fitted Curve')

plt.xlabel('t1')
plt.ylabel('Values')
plt.title('Error Function Fit')
plt.legend()
plt.show()

# Print the parameters of the fitted error function
print("Parameters (a, b, c, d):", popt)
print("R^2 value:", r_squared)

The current output is

enter image description here


Solution

  • You are fitting the wrong model, use the CDF (integrated form) to get the sigmoïd shaped curve:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy import stats, optimize
    from sklearn.metrics import r2_score
    
    def model(x, x0, s, A, c):
        law = stats.norm(loc=x0, scale=s)
        return A * law.cdf(x) + c
    
    popt, pcov = optimize.curve_fit(model, t, x)
    
    xhat = model(t, *popt)
    # array([0.0534708 , 0.01188575, 0.32716774, 0.66368917])
    
    score = r2_score(x, xhat)  # 0.9979168932633076
    

    It renders as follow:

    enter image description here