I've written code that performs steepest descent on a quadratic form given by the formula: 1/2 * (x1^2 + gamma * x2^2). Mathematically, I am taking the equations given in Boyd's Convex Optimization book as a guideline and would like to reproduce the given examples. The problem I have is that the trace of my steepest descent path looks sort of odd and does not match the trace shown in the books, even though it seems to converge.
Example, what my code spits out:
What it should look like:
Another one:
Vs what it should look like:
Here is a reproducible example, even though a bit long:
import numpy as np
import matplotlib.pyplot as plt
# Function value at a given point
def f(x1,x2):
return np.power(np.e,x1 + 3*x2 - 0.1) + np.power(np.e,x1 - 3*x2 - 0.1) + np.power(np.e,- x1 - 0.1)
# Partial Derivatives
def g(x1, x2):
g1 = np.power(np.e, x1 + 3 * x2 - 0.1) + np.power(np.e, x1 - 3 * x2 - 0.1) - np.power(np.e, - x1 - 0.1)
g2 = np.power(3*np.e, x1 + 3 * x2 - 0.1) - np.power(3 * np.e, x1 - 3 * x2 - 0.1)
return np.asarray([g1[0], g2[0]])
# Descent Parameters
alpha = 0.1
beta = 0.7
# Starting point
x0 = np.array([-1, 1], ndmin = 2, dtype = np.float64 ).T
f0 = f(x0[0], x0[1]) # Initial function value
# Calculate the minimum
xmin = np.array([0,0], ndmin = 2).T # Analytic minimum to the problem
fmin = f(0,0)
nangles = 1000
nopts = 1000
thetas = np.linspace(0, 2 * np.pi, nangles)
cont = np.zeros((5,2,nangles))
Delta = (f0 - fmin)/4
# function that plots the level curves or contour lines
for i in range(nangles):
ray = np.vstack([np.linspace(0,4,nopts) * np.cos(thetas[i]),
np.linspace(0,4,nopts) * np.sin(thetas[i])])
fray = f(ray[0], ray[1])
def get_ind(expression):
ind = np.nonzero(expression)[0].max(0)
return(ray[:,ind - 1] + ray[:,ind]) / 2
cont[0,:,i] = get_ind(fray < f0)
cont[1,:,i] = get_ind(fray < f0 - 3 * Delta)
cont[2,:,i] = get_ind(fray < f0 - 2 * Delta)
cont[3,:,i] = get_ind(fray < f0 - Delta)
cont[4,:,i] = get_ind(fray < f0 + Delta)
def plt_contour():
fig, ax = plt.subplots()
ax.plot(cont[0,0,:], cont[0,1,:], '--',
cont[1,0,:], cont[1,1,:], '--',
cont[2,0,:], cont[2,1,:], '--',
cont[3,0,:], cont[3,1,:], '--',
cont[4,0,:], cont[4,1,:], '--')
ax.axis('equal')
ax.axis('off')
return fig, ax
maxiters = 100
xs = np.zeros((2, maxiters))
fs = np.zeros((1, maxiters))
x = x0.copy()
Pnorm = np.diag([8,2])
for i in range(maxiters):
print(i)
fval = f(x[0],x[1])
xs[:,i:i + 1] = x
fs[:,i] = fval
if fval - fmin < 1e-10: #stopping criterion
break
# Backtracking Line Search
gval = g(x[0],x[1])
v = np.dot(-np.linalg.inv(Pnorm), gval) # I think this is the step that I am not doing right
s = 1
for k in range(10):
xnew = x + (s * v).reshape(2,1)
fxnew = f(xnew[0], xnew[1])
if fxnew < fval + s * alpha * gval.T @ v:
break
else:
s = s * beta
x = x + (s * v).reshape(2,1)
# Plot path
fig, ax = plt_contour()
fig.set_size_inches(5,5)
ax.plot( xs[0,0:i + 1], xs[1, 0:i + 1], 'o')
ax.plot( xs[0,0:i + 1], xs[1, 0:i + 1], '-')
plt.show()
I have a hunch that where I'm doing something wrong is the steepest descent step, namely the variable v. Maybe I am not actually doing what I think I am doing. Cheers for any help.
Is g2
calculation correct? Shouldn't 3 be outside the power function?
g2 = 3*np.power(np.e, x1 + 3 * x2 - 0.1) - 3*np.power(np.e, x1 - 3 * x2 - 0.1)