python scipy interpolation spline parametric-equations

Why does cubic spline algorithm produce spiky and incorrect graph on a parametric curve?

I recently implemented my implementation of the cubic spline algorithm described on wikipedia. Here is my current implementation which should calculate the coefficients:

def get_spline_natural_fifth_degree(x_vals, y_vals):
    # This is taken straight from wikipedia: https://en.wikipedia.org/wiki/Spline_(mathematics)#Algorithm_for_computing_natural_cubic_splines
    #x_vals = [p[0] for p in points]
    #y_vals = [p[1] for p in points]
    x_vals = list(x_vals)
    y_vals = list(y_vals)
    # 1. Create new array a of size n + 1 and for i = 0, …, n set a_i = y_i
    n = len(x_vals)-1
    a = [y_vals[i] for i in range(len(y_vals))]#  + [0.0] # Initialize the thing.
    assert len(a) == n + 1
    # a[-1] = 0.0 # Because the index 
    # 2. Create new arrays b and d, each of size n.
    assert len(a) == n + 1
    b = [0.0 for _ in range(n)]
    d = [0.0 for _ in range(n)]
    # 3. Create new array h of size n and for i = 0, …, n – 1 set h_i = x_(i+1) - x_i
    h = [x_vals[i+1] - x_vals[i] for i in range(n)]
    # 4. Create new array α of size n and for i = 1, …, n – 1 set alpha_1 = (3/h_i)*(a_(i+1) - a_i) - (3/h_(i-1))*(a_i-a_(i-1))
    alpha = [(3.0/h[i])*(a[i+1]-a[i])-(3.0/h[i-1])*(a[i]-a[i-1]) for i in range(1,n)] # Actually n-1, but because python ranges are dumb, we need to do this.
    #alpha.append(0.0)
    alpha = [0.0] + alpha
    assert len(alpha) == n
    # 5. Create new arrays c, l, μ, z, each of size n + 1.
    c = [0.0 for _ in range(n+1)]
    assert len(c) == len(x_vals)
    l = [0.0 for _ in range(n+1)]
    mu = [0.0 for _ in range(n+1)]
    z = [0.0 for _ in range(n+1)]
    # 6. Set l_0 = 1 , mu_0 = z_0 = 0
    l[0] = 1.0
    mu[0] = 0.0
    z[0] = 0.0
    # 7. For i = 1 .. n-1 set the following: l_i = 2*(x_(i+1)-x_(i-1))-(h_(i-1))*(mu_(i-1)) mu_i = h_i/l_i   z_i = (alpha_i-h_(i-1)*z_(i-1))/l_i
    for i in range(1, n):
        l[i] = 2*(x_vals[i+1]-x_vals[i-1])-(h[i-1])*(mu[i-1]) # Stuff.
        mu[i] = h[i]/l[i]
        z[i] = (alpha[i]-h[i-1]*z[i-1])/l[i]
    assert l[0] == 1.0
    # 8. Set l_n = 1; z_n = c_n = 0
    l[n] = 1.0
    assert c[n] == 0.0 # Should be zero...
    z[n] = 0.0
    # 9. For j = n – 1, n – 2, …, 0, set the following: c_j = z_j - mu_j*c_(j+1)   b_j = (a_(j+1)-a_j)/h_j - (h_j*(c_(j+1)+2*c_j))/3    and   d_j = (c_(j+1)-c_j)/(3*h_j)
    for j in range(n - 1, -1, -1):
        c[j] = z[j] - mu[j]*c[j+1]
    for j in range(n - 1, -1, -1):
        b[j] = (a[j+1]-a[j])/h[j] + (h[j]*(2*c[j+1]+c[j]))/3.0
    for j in range(n - 1, -1, -1):
        d[j] = (c[j+1]-c[j])/(3.0*h[j])
    a.pop(0)
    c.pop(0)
    splines = [[] for _ in range(4)]
    for i in range(n):
        splines[0].append(a[i])
        splines[1].append(b[i])
        splines[2].append(c[i])
        splines[3].append(d[i])
    return splines # Return the output....

and it works in some cases. I found this implementation on stackoverflow and I cross referenced it and it produces the exact same coefficients as my function.

Here is a script which demonstrates this:

#!/bin/python3

import matplotlib.pyplot as plt
import numpy as np
from spline import *
from scipy.interpolate import CubicSpline

def evaluate_spline(x, x_i, a, b, c, d):
    return a + (b + (c + d * (x - x_i)) * (x - x_i)) * (x - x_i)

def correct_interval(x, intervals): # This returns the correct interval index and the x0 start value.
    # Now loop over each element and check the thing.
    for i in range(len(intervals)-1):
        if x >= intervals[i] and x <= intervals[i+1]:
            return i, intervals[i] # Return the index thing... 
    assert False

def calculate_spline(x, t_knots, splines):
    i, t0 = correct_interval(x, t_knots)
    #print("Correct interval: "+str(i))
    if len(splines[0]) == 5:
            a, b, c, d, _ = splines[0][i], splines[1][i], splines[2][i], splines[3][i], splines[4][i]
    else:
        a, b, c, d = splines[0][i], splines[1][i], splines[2][i], splines[3][i]# , splines[4][i]
    return evaluate_spline(x, t0, a, b, c, d)


def compute_t_knots(points):
    """ Computes t values based on cumulative distance along the curve. """
    t_knots = [0]
    for i in range(1, len(points)):
        dist = np.sqrt((points[i][0] - points[i - 1][0])**2 + (points[i][1] - points[i - 1][1])**2)
        t_knots.append(t_knots[-1] + dist)  # Accumulate distances
    return t_knots

def render_result(x_splines: list, y_splines: list, points: list, t_knots: list) -> None:
    x_knots = [p[0] for p in points] # Something like this????
    y_knots = [p[1] for p in points]
    n = len(points)
    t_values = np.linspace(min(t_knots), max(t_knots), 20000)
    # Compute interpolated x and y values
    x_things = [calculate_spline(t, t_knots, x_splines) for t in t_values]
    y_things = [calculate_spline(t, t_knots, y_splines) for t in t_values]
    plt.plot(x_things, y_things, label="Cubic Spline Curve")
    plt.scatter(x_knots, y_knots, color="red", label="Control Points")
    plt.xlabel('x')
    plt.ylabel('y')
    plt.legend()
    plt.title('Spline Graph Using Manually Calculated Coefficients')
    plt.grid(True)
    plt.show()
    return

from math import sqrt

def compute_t_range(points):
    t_knots = [0]
    for i in range(1, len(points)):
        dist = sqrt((points[i][0] - points[i - 1][0])**2 + (points[i][1] - points[i - 1][1])**2)
        t_knots.append(t_knots[-1] + dist)  # Accumulate distances
    return t_knots

from plagiarized import * # Import the implementation which I copied from stackoverflow. It is stored in plagiarized.py

USE_MODE = 0

if __name__=="__main__":
    x_vals=[-0.83,0.14,-1.09,1.09,-0.54,2.03,3.0]
    y_vals=[-2.03,-2.06,0.71,1.49,2.06,2.43,3.0]

    assert len(x_vals) == len(y_vals)
    assert all([isinstance(x, float) for x in x_vals])
    assert all([isinstance(x, float) for x in y_vals])
    points = [[x_vals[i], y_vals[i]] for i in range(len(x_vals))]
    x_knots = [p[0] for p in points]
    y_knots = [p[1] for p in points]
    t_i = compute_t_knots(points)
    assert len(t_i) == len(points) == len(x_knots) == len(y_knots)
    points_x_t = [(t_i[i], x_knots[i]) for i in range(len(t_i))]
    points_y_t = [(t_i[i], y_knots[i]) for i in range(len(t_i))]
    spline_x_reference = CubicSpline(t_i, x_knots).c
    spline_y_reference = CubicSpline(t_i, y_knots).c
    spline_y = get_spline_natural_fifth_degree(t_i, y_knots)
    spline_x = get_spline_natural_fifth_degree(t_i, x_knots)
    spline_x_reference = list(spline_x_reference)
    spline_y_reference = list(spline_y_reference)
    spline_x_reference.reverse()
    spline_y_reference.reverse()
    spline_x_stuff = calc_spline_params(np.array(t_i), np.array(x_knots))
    spline_y_stuff = calc_spline_params(np.array(t_i), np.array(y_knots))
    if USE_MODE == 0: # Use the implementation which I took from stackoverflow
        render_result(spline_x_stuff, spline_y_stuff, points, t_i)
    elif USE_MODE == 1: # Use Scipy. This one actually displays the correct result.
        render_result(spline_x_reference, spline_y_reference, points, t_i)
    else: # Use my own implementation
        render_result(spline_x, spline_y, points, t_i)
    exit(0)

the implementation from the other post on stackoverflow is in a file called plagiarized.py and we call the coefficient calculation function from this script. The USE_MODE variable describes which method to use in calculation of the coefficients as explained in the code.

The graph displays correctly like so:

when running with USE_MODE == 1 meaning that the scipy implementation should be used. However, when running in either USE_MODE == 0 meaning the copied implementation from stackoverflow or when USE_MODE is something else meaning to use my own implementation, the following graph gets produced:

I tried looking at the scipy implementation of cubic splines here on github, but I wasn't able to really understand it.

I have uploaded the relevant files to my github. Just run python spline_plot.py to observe the wrong graph. Modify the value of USE_MODE to 1 and you can see what it is supposed to look like.

So, my question is why doesn't my code (or the other implementation for that matter) produce the same graph and why is the graph so spiky?

Thanks in advance for your answers!

Solution

When I compare your actual coefficients with mine (below) there is an "out-by-one" error in a[] and c[] and the values of b[] are wrong.

Two changes then. The first is that the setting of b[] needs to be amended. Change (both sign of the second term and indices of the c[] and then code from the Wikipedia article carefully at this point):

b[j] = (a[j+1]-a[j])/h[j] + (h[j]*(2*c[j+1]+c[j]))/3.0

b[j] = (a[j+1]-a[j])/h[j] - (h[j]*(2*c[j]+c[j+1]))/3.0

Second change is that you should remove the

a.pop(0)
c.pop(0)

which appear to be causing the main "out-by-one" error. Then your code produces the required figure (at least for this test case) and the same coefficients as mine below.

The following is from my own understanding of natural cubic splines (those that force second derivative to be 0 at the two ends). There's no link: I just hashed this out on paper.

For a fit x(t) to values at nodes 0, 1, ..., N of the form, between nodes i and i+1:

x = a_i + b_i(t-t_i) + c_i(t-t_i)² + d_i(t-t_i)³

To fit the nodal values, a_i is x_i. Continuity of second derivatives at nodes fixes the d_i in terms of c_i, whilst continuity of x at nodes fixes b_i in terms of c_i. Then continuity of first derivative at nodes and substitution of these relationships gives a tridiagonal system for the c_i. (This seems to be what the Wikipedia article actually describes, rather long-windedly.) The other coefficients then follow once you solve for c_i.

To debug your code I would (a) try to understand the formulation and where it came from and (b) plot x against t and y against t separately before you try to plot y against x.

import math
import numpy as np
import matplotlib.pyplot as plt

#-----------------------------------------------------------------------

def cumulativeDistance( x, y ):
    N = len( x )
    t = np.zeros( N )
    for i in range( 1, N ): t[i] = t[i-1] + math.sqrt( ( x[i] - x[i-1] ) ** 2 + ( y[i] - y[i-1] ) ** 2 )
    return t

#-----------------------------------------------------------------------

def tridiagonalSolver( a, b, c, d ):        # Solves a[i] X[i-1] + b[i] X[i] + c[i] X[i+1] = d[i]
    N = len( d )
    X = np.zeros( N )
    P = np.zeros( N )
    Q = np.zeros( N )

    # Forward pass
    P[0] = -c[0] / b[0]
    Q[0] =  d[0] / b[0]
    for i in range( 1, N ):
        denominator = b[i] + a[i] * P[i-1]
        P[i] = -c[i]                    / denominator
        Q[i] = ( d[i] - a[i] * Q[i-1] ) / denominator

    # Backward pass
    X[N-1] = Q[N-1]
    for i in range( N - 2, -1, -1 ):
        X[i] = P[i] * X[i+1] + Q[i]

    return X

#-----------------------------------------------------------------------

def spline( t, x ):
    N = len( x ) - 1
    dt = t[1:N+1] - t[0:N]
    dx = x[1:N+1] - x[0:N]

    # Set up tridiagonal system for coefficients c[i]
    left   = dt[0:N-1] / 3
    right  = dt[1:N  ] / 3
    middle = 2 * ( left + right )
    rhs    = dx[1:N] / dt[1:N] - dx[0:N-1] / dt[0:N-1]
    c = np.zeros( N + 1 )    # allows one to assume c[0] = c[N] = 0 for convenience
    c[1:N] = tridiagonalSolver( left, middle, right, rhs )

    # Remaining spline coeffiients
    d = ( c[1:N+1] - c[0:N] ) / ( 3 * dt )       # from continuity of x'' at t[i+1]
    b = dx / dt - c[0:N] * dt - d * dt ** 2      # from continuity of x at t[i+1]
    a = x[0:N]                                   # from value of x at t[i]
    c = c[0:N]                                   # spline intervals are 0, 1, ... , N-1
    return a, b, c, d

#-----------------------------------------------------------------------

x = np.array( [ -0.83,  0.14, -1.09, 1.09, -0.54, 2.03, 3.0 ] )
y = np.array( [ -2.03, -2.06,  0.71, 1.49,  2.06, 2.43, 3.0 ] )
t = cumulativeDistance( x, y )

ax, bx, cx, dx = spline( t, x )
ay, by, cy, dy = spline( t, y )

pts_per_spline = 21
for i in range( len( ax ) ):
    dt = np.linspace( 0, t[i+1] - t[i], pts_per_spline )
    xfit = ax[i] + dt * ( bx[i] + dt * ( cx[i] + dx[i] * dt ) )
    yfit = ay[i] + dt * ( by[i] + dt * ( cy[i] + dy[i] * dt ) )
    plt.plot( xfit, yfit, 'b' )
plt.plot( x, y, 'ro' )
plt.show()