Search code examples
pythonmathematical-optimizationgradient-descentconvex-optimization

Steepest Descent Trace Behavior


I've written code that performs steepest descent on a quadratic form given by the formula: 1/2 * (x1^2 + gamma * x2^2). Mathematically, I am taking the equations given in Boyd's Convex Optimization book as a guideline and would like to reproduce the given examples. The problem I have is that the trace of my steepest descent path looks sort of odd and does not match the trace shown in the books, even though it seems to converge.

Example, what my code spits out:

enter image description here

What it should look like:

enter image description here

Another one:

enter image description here

Vs what it should look like:

enter image description here

Here is a reproducible example, even though a bit long:

import numpy as np
import matplotlib.pyplot as plt

# Function value at a given point
def f(x1,x2):
    return np.power(np.e,x1 + 3*x2 - 0.1) + np.power(np.e,x1 - 3*x2 - 0.1) + np.power(np.e,- x1 - 0.1)

# Partial Derivatives
def g(x1, x2):
    g1 = np.power(np.e, x1 + 3 * x2 - 0.1) + np.power(np.e, x1 - 3 * x2 - 0.1) - np.power(np.e, - x1 - 0.1)
    g2 = np.power(3*np.e, x1 + 3 * x2 - 0.1) - np.power(3 * np.e, x1 - 3 * x2 - 0.1)
    return np.asarray([g1[0], g2[0]])

# Descent Parameters
alpha = 0.1
beta = 0.7

# Starting point
x0 = np.array([-1, 1], ndmin = 2, dtype = np.float64 ).T
f0 = f(x0[0], x0[1]) # Initial function value

# Calculate the minimum
xmin = np.array([0,0], ndmin = 2).T # Analytic minimum to the problem
fmin = f(0,0)

nangles = 1000
nopts = 1000
thetas = np.linspace(0, 2 * np.pi, nangles)
cont = np.zeros((5,2,nangles))
Delta = (f0 - fmin)/4

# function that plots the level curves or contour lines
for i in range(nangles):
    ray = np.vstack([np.linspace(0,4,nopts) * np.cos(thetas[i]),
                        np.linspace(0,4,nopts) * np.sin(thetas[i])])

    fray = f(ray[0], ray[1])

    def get_ind(expression):
        ind = np.nonzero(expression)[0].max(0)
        return(ray[:,ind - 1] + ray[:,ind]) / 2

    cont[0,:,i] = get_ind(fray < f0)
    cont[1,:,i] = get_ind(fray < f0 - 3 * Delta)
    cont[2,:,i] = get_ind(fray < f0 - 2 * Delta)
    cont[3,:,i] = get_ind(fray < f0 - Delta)
    cont[4,:,i] = get_ind(fray < f0 + Delta)

def plt_contour():
    fig, ax = plt.subplots()
    ax.plot(cont[0,0,:], cont[0,1,:], '--',
            cont[1,0,:], cont[1,1,:], '--',
            cont[2,0,:], cont[2,1,:], '--',
            cont[3,0,:], cont[3,1,:], '--',
            cont[4,0,:], cont[4,1,:], '--')
    ax.axis('equal')
    ax.axis('off')
    return fig, ax

maxiters = 100
xs = np.zeros((2, maxiters))
fs = np.zeros((1, maxiters))
x = x0.copy()
Pnorm = np.diag([8,2])

for i in range(maxiters):
    print(i)
    fval = f(x[0],x[1])
    xs[:,i:i + 1] = x
    fs[:,i] = fval
    if fval - fmin < 1e-10: #stopping criterion
        break
    # Backtracking Line Search
    gval = g(x[0],x[1])
    v = np.dot(-np.linalg.inv(Pnorm), gval) # I think this is the step that I am not doing right
    s = 1
    for k in range(10):
        xnew = x + (s * v).reshape(2,1)
        fxnew = f(xnew[0], xnew[1])
        if fxnew < fval + s * alpha * gval.T @ v:
            break
        else:
            s = s * beta
    x = x + (s * v).reshape(2,1)

# Plot path
fig, ax = plt_contour()
fig.set_size_inches(5,5)
ax.plot( xs[0,0:i + 1], xs[1, 0:i + 1], 'o')
ax.plot( xs[0,0:i + 1], xs[1, 0:i + 1], '-')
plt.show()

I have a hunch that where I'm doing something wrong is the steepest descent step, namely the variable v. Maybe I am not actually doing what I think I am doing. Cheers for any help.


Solution

  • Is g2 calculation correct? Shouldn't 3 be outside the power function?

    g2 = 3*np.power(np.e, x1 + 3 * x2 - 0.1) - 3*np.power(np.e, x1 - 3 * x2 - 0.1)