Search code examples
pythoncurve-fittingscipy-optimize

scipy.optimize.curve_fit wont converge on measured data even with very close guess


I have a large dataset of diode measurements and I'm trying to extract the theoretical parameters for further modeling. I have had success with scipy curve_fit before with simpler functions but currently, it's not returning anything close to the measured data.

Here is the code I am running:

import numpy as np
from scipy.special import lambertw as W
import scipy.optimize
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

def shockley(v_arr, Is, Rs, n, Vt):
    i_fit = ((n*Vt)/Rs)*W(((Is*Rs)/(n*Vt))*(np.exp(v_arr/(n*Vt))))
    i_fit = i_fit.real
    return i_fit

v_arr = np.array((0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,0.11,0.12,0.13,0.14,0.15,0.16,0.17,0.18,0.19,0.2,
                  0.21,0.22,0.23,0.24,0.25,0.26,0.27,0.28,0.29,0.3,0.31,0.32,0.33,0.34,0.35,0.36,0.37,0.38,0.39,0.4,
                  0.41,0.42,0.43,0.44,0.45,0.46,0.47,0.48,0.49,0.5,0.51,0.52,0.53,0.54,0.55,0.56,0.57,0.58,0.59,0.6,
                  0.61,0.62,0.63,0.64,0.65,0.66,0.67,0.68,0.69,0.7,0.71,0.72,0.73,0.74,0.75,0.76,0.77,0.78,0.79,0.8,
                  0.81,0.82,0.83,0.84,0.85,0.86,0.87,0.88,0.89,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1,1.01,
                  1.02,1.03,1.04,1.05,1.06,1.07,1.08,1.09,1.1,1.11,1.12,1.13,1.14,1.15,1.16,1.17,1.18,1.19,1.2,1.21,
                  1.22,1.23,1.24,1.25,1.26,1.27,1.28,1.29,1.3,1.31,1.32,1.33,1.34,1.35,1.36,1.37,1.38,1.39,1.4,1.41,
                  1.42,1.43,1.44,1.45,1.46,1.47,1.48,1.49,1.5,1.51,1.52,1.53,1.54,1.55,1.56,1.57,1.58,1.59))

i_arr = np.array((-3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
                  -9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
                  -1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
                  -6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
                  2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
                  6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
                  1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
                  2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
                  5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
                  0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
                  0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
                  0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
                  0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
                  0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
                  0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
                  0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
                  0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871))

guessIs = 2e-9
guessRs = 14.5
guessn = 1.3
guessVt = 0.026

guess_fit = shockley(v_arr, guessIs, guessRs, guessn,guessVt)

plt.plot(v_arr, guess_fit, label='guess')
plt.plot(v_arr, i_arr, label='data')

guess = np.array([guessIs, guessRs, guessn, guessVt])

popt, pcov = scipy.optimize.curve_fit(shockley, v_arr, i_arr, p0=guess, bounds=((2e-9, 0, 1, 0),(1, 20, 2, 1)))
Is, Rs, n, Vt = popt
print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')

test = shockley(v_arr, Is, Rs, n, Vt)

fit = shockley(v_arr, Is, Rs, n , Vt)
plt.plot(v_arr, fit, label='fit')
plt.yscale('log')
plt.xlabel('V')
plt.xlabel('I')
plt.legend()
plt.show()

chart showing poor fit

The plots show that the initial guess is quite close but the fit always diverges. I have added bounds that help a bit but I don't know what variability my results are going to have so I can't have them too tight.

I have tried changing the method to 'dogbox' and 'lm' (as per this example). 'dogbox' gets closest but seems to just follow the guess.


Solution

  • This is not a linear dataset (or function); a linear fit is not appropriate - or perhaps more specifically, a linear fit is not the first fit you should perform; you can use it as a secondary finishing step.

    Since the data are exponential, it's crucial that they be plotted on a semilog scale, which will make it obvious that your fit is way out of whack. Exponential functions are very touchy when it comes to initial conditions; I demonstrate how to use sliders to get "in the neighbourhood" by hand to choose initial conditions.

    import numpy as np
    from matplotlib.widgets import Slider
    from scipy.special import lambertw as W
    import scipy.optimize
    import matplotlib
    import matplotlib.pyplot as plt
    
    
    def shockley(
        v: np.ndarray,
        Is: float,
        Rs: float,
        n: float,
        Vt: float,
    ) -> np.ndarray:
        i_fit = (
            n*Vt/Rs
        )*W(
            Is*Rs/(n*Vt)
            *np.exp(v/(n*Vt))
        )
        return i_fit.real
    
    
    def shockley_log(
        v: np.ndarray,
        Is: float,
        Rs: float,
        n: float,
        Vt: float,
    ) -> np.ndarray:
        return np.log(shockley(v, Is, Rs, n, Vt))
    
    
    def load_data() -> tuple[np.ndarray, np.ndarray]:
        v_arr = np.arange(0, 1.6, 0.01)
    
        i_arr = np.array((
            -3.76E-11,5.83E-11,-1.05E-10,1.25E-10,-2.45E-11,-1.75E-11,-1.26E-11,-1.01E-11,-9.41E-12,-1.04E-11,
            -9.47E-12,-9.03E-12,-1.03E-11,-9.54E-12,-7.91E-12,-9.89E-12,-1.65E-11,1.59E-10,-5.90E-11,-1.17E-11,
            -1.09E-11,-7.61E-12,-2.86E-12,-8.39E-12,-7.87E-12,-7.91E-12,-6.43E-12,-6.87E-12,1.48E-12,-7.25E-12,
            -6.58E-12,-5.73E-12,-5.39E-12,-3.99E-12,-2.34E-12,-1.08E-12,2.06E-12,3.01E-12,8.24E-12,1.36E-11,
            2.26E-11,2.99E-11,4.11E-11,6.32E-11,8.54E-11,1.16E-10,1.53E-10,2.09E-10,2.73E-10,3.65E-10,4.93E-10,
            6.32E-10,8.46E-10,1.09E-09,1.42E-09,1.85E-09,2.41E-09,3.09E-09,3.98E-09,5.16E-09,6.61E-09,8.36E-09,
            1.10E-08,1.43E-08,1.84E-08,2.39E-08,3.11E-08,4.05E-08,5.41E-08,6.95E-08,9.13E-08,1.19E-07,1.57E-07,
            2.08E-07,2.79E-07,3.71E-07,4.94E-07,6.57E-07,8.83E-07,1.19E-06,1.61E-06,2.19E-06,3.00E-06,4.12E-06,
            5.72E-06,7.91E-06,1.10E-05,1.54E-05,2.16E-05,3.03E-05,4.23E-05,5.87E-05,8.09E-05,0.000110276,
            0.000148352,0.00019647,0.000256439,0.000329539,0.000416616,0.000519519,0.000638944,0.000775399,
            0.000930671,0.001105803,0.001299725,0.001514556,0.001750505,0.002006911,0.00228582,0.002586988,
            0.002909377,0.003255466,0.003622608,0.004013351,0.004426898,0.004860486,0.005317697,0.005796602,
            0.006294098,0.006813415,0.007352292,0.007908308,0.008483431,0.009075115,0.009680456,0.01030162,
            0.01094043,0.01158479,0.01224334,0.01290703,0.01358056,0.01426283,0.01494866,0.01564087,0.01633509,
            0.01703126,0.01773028,0.01843017,0.01912786,0.01982758,0.0205251,0.02121851,0.02191115,0.0226002,
            0.02328385,0.02396644,0.02464198,0.02531404,0.02598236,0.02664429,0.02730172,0.02795239,0.02859697,
            0.02923778,0.02987059,0.02999878,0.02999881,0.02999872,0.02999867,0.02999871,
        ))
    
        return v_arr, i_arr
    
    
    def fit(
        v: np.ndarray,
        i: np.ndarray,
        guessIs: float = 2e-16,
        guessRs: float = 15,
        guessn: float = 0.2,
        guessVt: float = 0.2,
    ) -> tuple[
         np.ndarray,
         np.ndarray,
         np.ndarray,
    ]:
        p0 = np.array((guessIs, guessRs, guessn, guessVt))
        i_guess = shockley(v, *p0)
    
        popt, pcov = scipy.optimize.curve_fit(
            f=shockley_log,
            xdata=v, ydata=np.log(i), p0=p0,
            # bounds=(
            #     (1e-16, 0, 0.05, 0),
            #     (1e-9, 20, 1, 1),
            # ),
            method='lm',
        )
        i_fit = shockley(v, *popt)
        return popt, i_guess, i_fit
    
    
    def plot(
        v: np.ndarray,
        i: np.ndarray,
        i_guess: np.ndarray,
        i_fit: np.ndarray,
    ) -> tuple[plt.Figure, Slider, Slider, Slider, Slider]:
        fig, ax = plt.subplots()
    
        ax.semilogy(v, i, label='experiment')
        ax.semilogy(v[:i_guess.size], i_guess, label='estimate')
        ax.semilogy(v[:i_fit.size], i_fit, label='fit')
        ax.set_xlabel('v')
        ax.set_ylabel('i')
        ax.legend()
    
        return fig
    
    
    def show_sliders(
        v: np.ndarray,
        i: np.ndarray,
    ) -> plt.Figure:
        fig: plt.Figure
        ax: plt.Axes
        fig, (ax, *slider_axes) = plt.subplots(ncols=5)
        p0 = 1e-9, 20, 0.2, 0.2
        ax.semilogy(v, i, label='experiment')
        line, = ax.semilogy(v, shockley(v, *p0), label='parametric')
        ax.set_xlabel('v')
        ax.set_ylabel('i')
    
        is_slider = Slider(
            ax=slider_axes[0], label='Is', valmin=-20, valmax=-9, valinit=-16, orientation='vertical',
        )
        rs_slider = Slider(
            ax=slider_axes[1], label='Rs', valmin=0.1, valmax=100, valinit=p0[1], orientation='vertical',
        )
        n_slider = Slider(
            ax=slider_axes[2], label='n', valmin=0.01, valmax=0.5, valinit=p0[2], orientation='vertical',
        )
        vt_slider = Slider(
            ax=slider_axes[3], label='Vt', valmin=0, valmax=1, valinit=p0[3], orientation='vertical',
        )
    
        def update(val):
            line.set_ydata(shockley(v, 10**is_slider.val, rs_slider.val, n_slider.val, vt_slider.val))
            fig.canvas.draw_idle()
    
        is_slider.on_changed(update)
        rs_slider.on_changed(update)
        n_slider.on_changed(update)
        vt_slider.on_changed(update)
    
        return fig, is_slider, rs_slider, n_slider, vt_slider
    
    
    def main() -> None:
        v, i = load_data()
        section = (v > 0.3) & (i > 0)
        v = v[section]
        i = i[section]
    
        matplotlib.use('TkAgg')
        # fig, *sliders = show_sliders(v, i)
    
        popt, i_guess, i_fit = fit(v=v, i=i)
        (Is, Rs, n, Vt) = popt
        print(f'Is={Is}, Rs={Rs}, n={n}, Vt={Vt}')
    
        plot(v, i, i_guess, i_fit)
        plt.show()
    
    
    if __name__ == '__main__':
        main()
    
    Is=1.8087970458977566e-16, Rs=14.46916151330944, n=1.0978775079999148, Vt=0.031461129386302314
    

    fit

    Further: notice that n and Vt are not meaningfully distinct degrees of freedom, since they always appear as a product in the Shockley equation. That means that you can choose any value for n and you could still fit a Vt to appear correct. For your purposes, you can just merge those two variables to a single nVt during fit - or better, fix Vt at the thermal voltage for standard ambient temperature - 25.85 mV - and only optimise on n. This gets you closer to

    Is=1.8087964456455226e-16, Rs=14.469165848153489, n=1.336188473508956