Search code examples
pythonnumpyscipycurve-fittingpiecewise

How to make a piecewise linear fit in Python with some constant pieces?


I'm trying to make a piecewise linear fit consisting of 3 pieces whereof the first and last pieces are constant. As you can see in this figure figure

don't get the expected fit, since the fit doesn't capture the 3 linear pieces clearly visual from the original data points.

I've tried following this question and expanded it to the case of 3 pieces with the two constant pieces, but I must have done something wrong.

Here is my code:

from scipy import optimize
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.rcParams['figure.figsize'] = [16, 6]

x = np.arange(0, 50, dtype=float)
y = np.array([50 for i in range(10)]
             + [50 - (50-5)/31 * i for i in range(1, 31)]
             + [5 for i in range(10)],
             dtype=float)

def piecewise_linear(x, x0, y0, x1, y1):
    return np.piecewise(x,
                        [x < x0, (x >= x0) & (x < x1), x >= x1],
                        [lambda x:y0, lambda x:(y1-y0)/(x1-x0)*(x-x0)+y0, lambda x:y1])

p , e = optimize.curve_fit(piecewise_linear, x, y)
xd = np.linspace(0, 50, 101)

plt.plot(x, y, "o", label='original data')
plt.plot(xd, piecewise_linear(xd, *p), label='piecewise linear fit')
plt.legend()

The accepted answer to the previous mentioned question suggest looking at segments_fit.ipynb for the case of N parts, but following that it doesn't seem that I can specify, that the first and last pieces should be constant.

Furthermore I do get the following warning:

OptimizeWarning: Covariance of the parameters could not be estimated

What do I do wrong?


Solution

  • You could directly copy the segments_fit implementation

    from scipy import optimize
    
    def segments_fit(X, Y, count):
        xmin = X.min()
        xmax = X.max()
    
        seg = np.full(count - 1, (xmax - xmin) / count)
    
        px_init = np.r_[np.r_[xmin, seg].cumsum(), xmax]
        py_init = np.array([Y[np.abs(X - x) < (xmax - xmin) * 0.01].mean() for x in px_init])
    
        def func(p):
            seg = p[:count - 1]
            py = p[count - 1:]
            px = np.r_[np.r_[xmin, seg].cumsum(), xmax]
            return px, py
    
        def err(p):
            px, py = func(p)
            Y2 = np.interp(X, px, py)
            return np.mean((Y - Y2)**2)
    
        r = optimize.minimize(err, x0=np.r_[seg, py_init], method='Nelder-Mead')
        return func(r.x)
    

    Then you apply it as follows

    import numpy as np;
    
    # mimic your data
    x = np.linspace(0, 50)
    y = 50 - np.clip(x, 10, 40)
    
    # apply the segment fit
    fx, fy = segments_fit(x, y, 3)
    

    This will give you (fx,fy) the corners your piecewise fit, let's plot it

    import matplotlib.pyplot as plt
    
    # show the results
    plt.figure(figsize=(8, 3))
    plt.plot(fx, fy, 'o-')
    plt.plot(x, y, '.')
    plt.legend(['fitted line', 'given points'])
    

    piecewise linear fit

    EDIT: Introducing constant segments

    As mentioned in the comments the above example doesn't guarantee that the output will be constant in the end segments.

    Based on this implementation the easier way I can think is to restrict func(p) to do that, a simple way to ensure a segment is constant, is to set y[i+1]==y[i]. Thus I added xanchor and yanchor. If you give an array with repeated numbers you can bind multiple points to the same value.

    from scipy import optimize
    
    def segments_fit(X, Y, count, xanchors=slice(None), yanchors=slice(None)):
        xmin = X.min()
        xmax = X.max()
        seg = np.full(count - 1, (xmax - xmin) / count)
    
        px_init = np.r_[np.r_[xmin, seg].cumsum(), xmax]
        py_init = np.array([Y[np.abs(X - x) < (xmax - xmin) * 0.01].mean() for x in px_init])
    
        def func(p):
            seg = p[:count - 1]
            py = p[count - 1:]
            px = np.r_[np.r_[xmin, seg].cumsum(), xmax]
            py = py[yanchors]
            px = px[xanchors]
            return px, py
    
        def err(p):
            px, py = func(p)
            Y2 = np.interp(X, px, py)
            return np.mean((Y - Y2)**2)
    
        r = optimize.minimize(err, x0=np.r_[seg, py_init], method='Nelder-Mead')
        return func(r.x)
    

    I modified a little the data generation to make it more clear the effect of the change

    import matplotlib.pyplot as plt
    import numpy as np;
    
    # mimic your data
    x = np.linspace(0, 50)
    y = 50 - np.clip(x, 10, 40) + np.random.randn(len(x)) + 0.25 * x
    # apply the segment fit
    fx, fy = segments_fit(x, y, 3)
    plt.plot(fx, fy, 'o-')
    plt.plot(x, y, '.k')
    # apply the segment fit with some consecutive points having the 
    # same anchor
    fx, fy = segments_fit(x, y, 3, yanchors=[1,1,2,2])
    plt.plot(fx, fy, 'o--r')
    plt.legend(['fitted line', 'given points', 'with const segments'])
    

    enter image description here