scipy.optimize.curve_fit wont converge on measured data even with very close guess

I have a large dataset of diode measurements and I'm trying to extract the theoretical parameters for further modeling. I have had success with scipy curve_fit before with simpler functions but currently, it's not returning anything close to the measured data.

Here is the code I am running:

import numpy as np
from scipy.special import lambertw as W
import scipy.optimize
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

def shockley(v_arr, Is, Rs, n, Vt):
    i_fit = ((n*Vt)/Rs)*W(((Is*Rs)/(n*Vt))*(np.exp(v_arr/(n*Vt))))
    i_fit = i_fit.real
    return i_fit

v_arr = np.array((0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,
                  0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,
                  0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,
                  0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,
                  0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1,1.01,
                  1.02,1.03,1.04,1.05,1.06,1.07,1.08,1.09,1.1,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.2,1.21,
                  1.22,1.23,1.24,1.25,1.26,1.27,1.28,1.29,1.3,1.31,1.32,1.33,1.34,1.35,1.36,1.37,1.38,1.39,1.4,1.41,
                  1.42,1.43,1.44,1.45,1.46,1.47,1.48,1.49,1.5,1.51,1.52,1.53,1.54,1.55,1.56,1.57,1.58,1.59))

i_arr = np.array((-3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
                  -9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
                  -1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
                  -6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
                  2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
                  6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
                  1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
                  2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
                  5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
                  0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
                  0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
                  0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
                  0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
                  0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
                  0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
                  0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
                  0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871))

guessIs = 2e-9
guessRs = 14.5
guessn = 1.3
guessVt = 0.026

guess_fit = shockley(v_arr, guessIs, guessRs, guessn,guessVt)

plt.plot(v_arr, guess_fit, label='guess')
plt.plot(v_arr, i_arr, label='data')

guess = np.array([guessIs, guessRs, guessn, guessVt])

popt, pcov = scipy.optimize.curve_fit(shockley, v_arr, i_arr, p0=guess, bounds=((2e-9, 0, 1, 0),(1, 20, 2, 1)))
Is, Rs, n, Vt = popt
print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')

test = shockley(v_arr, Is, Rs, n, Vt)

fit = shockley(v_arr, Is, Rs, n , Vt)
plt.plot(v_arr, fit, label='fit')
plt.yscale('log')
plt.xlabel('V')
plt.xlabel('I')
plt.legend()
plt.show()

The plots show that the initial guess is quite close but the fit always diverges. I have added bounds that help a bit but I don't know what variability my results are going to have so I can't have them too tight.

I have tried changing the method to 'dogbox' and 'lm' (as per this example). 'dogbox' gets closest but seems to just follow the guess.

Solution

This is not a linear dataset (or function); a linear fit is not appropriate - or perhaps more specifically, a linear fit is not the first fit you should perform; you can use it as a secondary finishing step.

Since the data are exponential, it's crucial that they be plotted on a semilog scale, which will make it obvious that your fit is way out of whack. Exponential functions are very touchy when it comes to initial conditions; I demonstrate how to use sliders to get "in the neighbourhood" by hand to choose initial conditions.

import numpy as np
from matplotlib.widgets import Slider
from scipy.special import lambertw as W
import scipy.optimize
import matplotlib
import matplotlib.pyplot as plt


def shockley(
    v: np.ndarray,
    Is: float,
    Rs: float,
    n: float,
    Vt: float,
) -> np.ndarray:
    i_fit = (
        n*Vt/Rs
    )*W(
        Is*Rs/(n*Vt)
        *np.exp(v/(n*Vt))
    )
    return i_fit.real


def shockley_log(
    v: np.ndarray,
    Is: float,
    Rs: float,
    n: float,
    Vt: float,
) -> np.ndarray:
    return np.log(shockley(v, Is, Rs, n, Vt))


def load_data() -> tuple[np.ndarray, np.ndarray]:
    v_arr = np.arange(0, 1.6, 0.01)

    i_arr = np.array((
        -3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
        -9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
        -1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
        -6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
        2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
        6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
        1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
        2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
        5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
        0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
        0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
        0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
        0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
        0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
        0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
        0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
        0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871,
    ))

    return v_arr, i_arr


def fit(
    v: np.ndarray,
    i: np.ndarray,
    guessIs: float = 2e-16,
    guessRs: float = 15,
    guessn: float = 0.2,
    guessVt: float = 0.2,
) -> tuple[
     np.ndarray,
     np.ndarray,
     np.ndarray,
]:
    p0 = np.array((guessIs, guessRs, guessn, guessVt))
    i_guess = shockley(v, *p0)

    popt, pcov = scipy.optimize.curve_fit(
        f=shockley_log,
        xdata=v, ydata=np.log(i), p0=p0,
        # bounds=(
        #     (1e-16, 0, 0.05, 0),
        #     (1e-9, 20, 1, 1),
        # ),
        method='lm',
    )
    i_fit = shockley(v, *popt)
    return popt, i_guess, i_fit


def plot(
    v: np.ndarray,
    i: np.ndarray,
    i_guess: np.ndarray,
    i_fit: np.ndarray,
) -> tuple[plt.Figure, Slider, Slider, Slider, Slider]:
    fig, ax = plt.subplots()

    ax.semilogy(v, i, label='experiment')
    ax.semilogy(v[:i_guess.size], i_guess, label='estimate')
    ax.semilogy(v[:i_fit.size], i_fit, label='fit')
    ax.set_xlabel('v')
    ax.set_ylabel('i')
    ax.legend()

    return fig


def show_sliders(
    v: np.ndarray,
    i: np.ndarray,
) -> plt.Figure:
    fig: plt.Figure
    ax: plt.Axes
    fig, (ax, *slider_axes) = plt.subplots(ncols=5)
    p0 = 1e-9, 20, 0.2, 0.2
    ax.semilogy(v, i, label='experiment')
    line, = ax.semilogy(v, shockley(v, *p0), label='parametric')
    ax.set_xlabel('v')
    ax.set_ylabel('i')

    is_slider = Slider(
        ax=slider_axes[0], label='Is', valmin=-20, valmax=-9, valinit=-16, orientation='vertical',
    )
    rs_slider = Slider(
        ax=slider_axes[1], label='Rs', valmin=0.1, valmax=100, valinit=p0[1], orientation='vertical',
    )
    n_slider = Slider(
        ax=slider_axes[2], label='n', valmin=0.01, valmax=0.5, valinit=p0[2], orientation='vertical',
    )
    vt_slider = Slider(
        ax=slider_axes[3], label='Vt', valmin=0, valmax=1, valinit=p0[3], orientation='vertical',
    )

    def update(val):
        line.set_ydata(shockley(v, 10**is_slider.val, rs_slider.val, n_slider.val, vt_slider.val))
        fig.canvas.draw_idle()

    is_slider.on_changed(update)
    rs_slider.on_changed(update)
    n_slider.on_changed(update)
    vt_slider.on_changed(update)

    return fig, is_slider, rs_slider, n_slider, vt_slider


def main() -> None:
    v, i = load_data()
    section = (v > 0.3) & (i > 0)
    v = v[section]
    i = i[section]

    matplotlib.use('TkAgg')
    # fig, *sliders = show_sliders(v, i)

    popt, i_guess, i_fit = fit(v=v, i=i)
    (Is, Rs, n, Vt) = popt
    print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')

    plot(v, i, i_guess, i_fit)
    plt.show()


if __name__ == '__main__':
    main()

Is=1.8087970458977566e-16, Rs=14.46916151330944, n=1.0978775079999148, Vt=0.031461129386302314

Further: notice that n and Vt are not meaningfully distinct degrees of freedom, since they always appear as a product in the Shockley equation. That means that you can choose any value for n and you could still fit a Vt to appear correct. For your purposes, you can just merge those two variables to a single nVt during fit - or better, fix Vt at the thermal voltage for standard ambient temperature - 25.85 mV - and only optimise on n. This gets you closer to

Is=1.8087964456455226e-16, Rs=14.469165848153489, n=1.336188473508956