Search code examples
pythonrregressioncurve-fittingangle

Angle arms fitting to data points


I would like to fit "angle arms" to data points in R.

Main assumption about data is that y values increase quite significantly at the beginning, and after some time increase is remarkably slower (something like a plateau with deviations).

Is it possible to determine x coordinate of such point (angle vertex) without specific assumptions? My idea is to divide x range to two parts and perform linear regression to fit angle arms.

Picture shows two possible angle vertexes (yellow/red) that looks quite good for me (straight line/empty circles is linear regression fit for the all data). Solid blue circles/points represent input data. Green bold lines are my subjective angle arms fit to the data.

Angle fit example


Solution

  • Per the comments, here is a graphical Python example that fits two straight lines to data with the breakpoint between them automatically selected as a fitted parameter. This example uses the standard scipy differential_evolution genetic algorithm module to determine initial parameter estimates for both the two straight lines and the breakpoint. That scipy module uses the Latin Hypercube algorithm to ensure a thorough search of parameter space, requiring bounds withing which to search. In this example, those bounds are derived from the input data.

    plot

    import numpy, scipy, matplotlib
    import matplotlib.pyplot as plt
    from scipy.optimize import curve_fit
    from scipy.optimize import differential_evolution
    import warnings
    
    xData = numpy.array([19.1647, 18.0189, 16.9550, 15.7683, 14.7044, 13.6269, 12.6040, 11.4309, 10.2987, 9.23465, 8.18440, 7.89789, 7.62498, 7.36571, 7.01106, 6.71094, 6.46548, 6.27436, 6.16543, 6.05569, 5.91904, 5.78247, 5.53661, 4.85425, 4.29468, 3.74888, 3.16206, 2.58882, 1.93371, 1.52426, 1.14211, 0.719035, 0.377708, 0.0226971, -0.223181, -0.537231, -0.878491, -1.27484, -1.45266, -1.57583, -1.61717])
    yData = numpy.array([0.644557, 0.641059, 0.637555, 0.634059, 0.634135, 0.631825, 0.631899, 0.627209, 0.622516, 0.617818, 0.616103, 0.613736, 0.610175, 0.606613, 0.605445, 0.603676, 0.604887, 0.600127, 0.604909, 0.588207, 0.581056, 0.576292, 0.566761, 0.555472, 0.545367, 0.538842, 0.529336, 0.518635, 0.506747, 0.499018, 0.491885, 0.484754, 0.475230, 0.464514, 0.454387, 0.444861, 0.437128, 0.415076, 0.401363, 0.390034, 0.378698])
    
    
    def func(xArray, breakpoint, slopeA, offsetA, slopeB, offsetB):
        returnArray = []
        for x in xArray:
            if x < breakpoint:
                returnArray.append(slopeA * x + offsetA)
            else:
                returnArray.append(slopeB * x + offsetB)
        return returnArray
    
    
    # function for genetic algorithm to minimize (sum of squared error)
    def sumOfSquaredError(parameterTuple):
        warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
        val = func(xData, *parameterTuple)
        return numpy.sum((yData - val) ** 2.0)
    
    
    def generate_Initial_Parameters():
        # min and max used for bounds
        maxX = max(xData)
        minX = min(xData)
        maxY = max(yData)
        minY = min(yData)
        slope = 10.0 * (maxY - minY) / (maxX - minX) # times 10 for safety margin
    
        parameterBounds = []
        parameterBounds.append([minX, maxX]) # search bounds for breakpoint
        parameterBounds.append([-slope, slope]) # search bounds for slopeA
        parameterBounds.append([minY, maxY]) # search bounds for offsetA
        parameterBounds.append([-slope, slope]) # search bounds for slopeB
        parameterBounds.append([minY, maxY]) # search bounds for offsetB
    
    
        result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
        return result.x
    
    # by default, differential_evolution completes by calling curve_fit() using parameter bounds
    geneticParameters = generate_Initial_Parameters()
    
    # call curve_fit without passing bounds from genetic algorithm
    fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
    print('Parameters:', fittedParameters)
    print()
    
    modelPredictions = func(xData, *fittedParameters) 
    
    absError = modelPredictions - yData
    
    SE = numpy.square(absError) # squared errors
    MSE = numpy.mean(SE) # mean squared errors
    RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
    Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
    
    print()
    print('RMSE:', RMSE)
    print('R-squared:', Rsquared)
    
    print()
    
    
    ##########################################################
    # graphics output section
    def ModelAndScatterPlot(graphWidth, graphHeight):
        f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
        axes = f.add_subplot(111)
    
        # first the raw data as a scatter plot
        axes.plot(xData, yData,  'D')
    
        # create data for the fitted equation plot
        xModel = numpy.linspace(min(xData), max(xData))
        yModel = func(xModel, *fittedParameters)
    
        # now the model as a line plot
        axes.plot(xModel, yModel)
    
        axes.set_xlabel('X Data') # X axis data label
        axes.set_ylabel('Y Data') # Y axis data label
    
        plt.show()
        plt.close('all') # clean up after using pyplot
    
    graphWidth = 800
    graphHeight = 600
    ModelAndScatterPlot(graphWidth, graphHeight)