Search code examples
pythonmachine-learningplotlygradient-descentplotly-python

How to plot gradient descent using plotly


I have been trying to replicate some work similar to this code below but when I try to use this data from this link https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv Its throwing some error. I think its because of shape but don't know exactly how to modify it.

It will be great, if you help me to resolve the issue.

Here is my Code

from IPython.core.display import HTML
import plotly
import plotly.graph_objects as go
import noise
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import axes3d
%matplotlib inline
import pandas as pd

data =  = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')

z = data

import numpy as np
from numpy.lib.stride_tricks import as_strided

def sliding_window(arr, window_size):
    """ Construct a sliding window view of the array"""
    arr = np.asarray(arr)
    window_size = int(window_size)
    if arr.ndim != 2:
        raise ValueError("need 2-D input")
    if not (window_size > 0):
        raise ValueError("need a positive window size")
    shape = (arr.shape[0] - window_size + 1,
             arr.shape[1] - window_size + 1,
             window_size, window_size)
    if shape[0] <= 0:
        shape = (1, shape[1], arr.shape[0], shape[3])
    if shape[1] <= 0:
        shape = (shape[0], 1, shape[2], arr.shape[1])
    strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
               arr.shape[1]*arr.itemsize, arr.itemsize)
    return as_strided(arr, shape=shape, strides=strides)

def cell_neighbours(arr, i, j, d):
    """Return d-th neighbors of cell (i, j)"""
    w = sliding_window(arr, 2*d+1)

    ix = np.clip(i - d, 0, w.shape[0]-1)
    jx = np.clip(j - d, 0, w.shape[1]-1)

    i0 = max(0, i - d - ix)
    j0 = max(0, j - d - jx)
    i1 = w.shape[2] - max(0, d - i + ix)
    j1 = w.shape[3] - max(0, d - j + jx)

    return w[ix, jx][i0:i1,j0:j1].ravel()

from dataclasses import dataclass

@dataclass
class descent_step:
    """Class for storing each step taken in gradient descent"""
    value: float
    x_index: float
    y_index: float

def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
    # Initial point to start gradient descent at
    step = descent_step(array[y_start][x_start],x_start,y_start)
    
    # Store each step taken in gradient descent in a list
    step_history = []
    step_history.append(step)
    
    # Plot 2D representation of array with startng point as a red marker
    if plot:
        matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
        matplotlib.pyplot.plot(x_start,y_start,'ro')
    current_x = x_start
    current_y = y_start

    # Loop through specified number of steps of gradient descent to take
    for i in range(steps):
        prev_x = current_x
        prev_y = current_y
        
        # Extract array of neighbouring cells around current step location with size nominated
        neighbours=cell_neighbours(array,current_y,current_x,step_size)
        
        # Locate minimum in array (steepest slope from current point)
        next_step = neighbours.min()
        indices = np.where(array == next_step)
        
        # Update current point to now be the next point after stepping
        current_x, current_y = (indices[1][0],indices[0][0])
        step = descent_step(array[current_y][current_x],current_x,current_y)
        
        step_history.append(step)
        
        # Plot each step taken as a black line to the current point nominated by a red marker
        if plot:
            matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
            matplotlib.pyplot.plot(current_x,current_y,'ro')
            
        # If step is to the same location as previously, this infers convergence and end loop
        if prev_y == current_y and prev_x == current_x:
            print(f"Converged in {i} steps")
            break
    return next_step,step_history

np.random.seed(42)
global_minimum = z.min()
indices = np.where(z == global_minimum)
print(f"Target: {global_minimum} @ {indices}")

step_size = 0
found_minimum = 99999

# Random starting point
start_x = np.random.randint(0,50)
start_y = np.random.randint(0,50)

# Increase step size until convergence on global minimum
while found_minimum != global_minimum:
    step_size += 1
    found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=False)

print(f"Optimal step size {step_size}")
found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
print(f"Steps: {steps}")

def multiDimenDist(point1,point2):
   #find the difference between the two points, its really the same as below
   deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
   runningSquared = 0
   #because the pythagarom theorm works for any dimension we can just use that
   for coOrd in deltaVals:
       runningSquared += coOrd**2
   return runningSquared**(1/2)
def findVec(point1,point2,unitSphere = False):
  #setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
  finalVector = [0 for coOrd in point1]
  for dimension, coOrd in enumerate(point1):
      #finding total differnce for that co-ordinate(x,y,z...)
      deltaCoOrd = point2[dimension]-coOrd
      #adding total difference
      finalVector[dimension] = deltaCoOrd
  if unitSphere:
      totalDist = multiDimenDist(point1,point2)
      unitVector =[]
      for dimen in finalVector:
          unitVector.append( dimen/totalDist)
      return unitVector
  else:
      return finalVector

def generate_3d_plot(step_history):
    # Initialise empty lists for markers
    step_markers_x = []
    step_markers_y = []
    step_markers_z = []
    step_markers_u = []
    step_markers_v = []
    step_markers_w = []
    
    for index, step in enumerate(step_history):
        step_markers_x.append(step.x_index)
        step_markers_y.append(step.y_index)
        step_markers_z.append(step.value)
        
        # If we haven't reached the final step, calculate the vector between the current step and the next step
        if index < len(steps)-1:
            vec1 = [step.x_index,step.y_index,step.value]
            vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]

            result_vector = findVec(vec1,vec2)
            step_markers_u.append(result_vector[0])
            step_markers_v.append(result_vector[1])
            step_markers_w.append(result_vector[2])
        else:
            step_markers_u.append(0.1)
            step_markers_v.append(0.1)
            step_markers_w.append(0.1)
    
    # Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
    fig = go.Figure(data=[
        go.Cone(
        x=step_markers_x,
        y=step_markers_y,
        z=step_markers_z,
        u=step_markers_u,
        v=step_markers_v,
        w=step_markers_w,
        sizemode="absolute",
        sizeref=2,
        anchor='tail'),

        go.Scatter3d(
        x=step_markers_x,
        y=step_markers_y,
        z=step_markers_z,
        mode='lines',
        line=dict(
            color='red',
            width=2
        )),

        go.Surface(colorscale=terrain,z=world,opacity=0.5)])


    # Z axis is limited to the extent of the terrain array
    fig.update_layout(
        title='Gradient Descent Steps',
        scene = dict(zaxis = dict(range=[world.min(),world.max()],),),)
    return fig
    
# Generate 3D plot from previous random starting location
fig = generate_3d_plot(steps)
HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))

Solution

  • The error is occurring because found_minimum is an int, but global_minimum is a Series. I think the tutorial you're referencing assumes that the data is loaded as a numpy array, but it is never explicitly stated.

    So, z = data.to_numpy() solves one problem and reveals another which is that the tutorial dataset is 50x50 and your data is 25x25. It's tempting to just change the limits of the random starting point, but this doesn't end up working well. The dataset is just too small for this implementation of gradient descent to appropriately converge.

    To get around this issue, I just altered your dataset to manufacture a 50x50 set:

    data_arr = data.to_numpy()
    double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
    quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
    

    Passing this quad_arr as needed throughout the code as well as updating the plotly colorscale to go.Surface(colorscale=Earth) gives:

    gradient_descent_terrain

    Full, copy-pastable code:

    from IPython.core.display import HTML
    import plotly
    import plotly.graph_objects as go
    import noise
    import numpy as np
    import matplotlib
    from mpl_toolkits.mplot3d import axes3d
    %matplotlib inline
    import pandas as pd
    
    data = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/api_docs/mt_bruno_elevation.csv')
    
    data_arr = data.to_numpy()
    double_arr = np.append(data_arr, 1.5*data_arr + 50, axis=0)
    quad_arr = np.append(double_arr, 1.5*double_arr + 50, axis=1)
    
    z = quad_arr
    matplotlib.pyplot.imshow(z,origin='lower',cmap='terrain')
    
    # Find maximum value index in numpy array
    indices = np.where(z == z.max())
    max_z_x_location, max_z_y_location = (indices[1][0],indices[0][0])
    matplotlib.pyplot.plot(max_z_x_location,max_z_y_location,'ro',markersize=15)
    
    # Find minimum value index in numpy array
    indices = np.where(z == z.min())
    min_z_x_location, min_z_y_location = (indices[1][0],indices[0][0])
    matplotlib.pyplot.plot(min_z_x_location,min_z_y_location,'yo',markersize=15)
    
    import numpy as np
    from numpy.lib.stride_tricks import as_strided
    
    def sliding_window(arr, window_size):
        """ Construct a sliding window view of the array"""
        arr = np.asarray(arr)
        window_size = int(window_size)
        if arr.ndim != 2:
            raise ValueError("need 2-D input")
        if not (window_size > 0):
            raise ValueError("need a positive window size")
        shape = (arr.shape[0] - window_size + 1,
                 arr.shape[1] - window_size + 1,
                 window_size, window_size)
        if shape[0] <= 0:
            shape = (1, shape[1], arr.shape[0], shape[3])
        if shape[1] <= 0:
            shape = (shape[0], 1, shape[2], arr.shape[1])
        strides = (arr.shape[1]*arr.itemsize, arr.itemsize,
                   arr.shape[1]*arr.itemsize, arr.itemsize)
        return as_strided(arr, shape=shape, strides=strides)
    
    def cell_neighbours(arr, i, j, d):
        """Return d-th neighbors of cell (i, j)"""
        w = sliding_window(arr, 2*d+1)
    
        ix = np.clip(i - d, 0, w.shape[0]-1)
        jx = np.clip(j - d, 0, w.shape[1]-1)
    
        i0 = max(0, i - d - ix)
        j0 = max(0, j - d - jx)
        i1 = w.shape[2] - max(0, d - i + ix)
        j1 = w.shape[3] - max(0, d - j + jx)
    
        return w[ix, jx][i0:i1,j0:j1].ravel()
    
    from dataclasses import dataclass
    
    @dataclass
    class descent_step:
        """Class for storing each step taken in gradient descent"""
        value: float
        x_index: float
        y_index: float
    
    def gradient_descent_3d(array,x_start,y_start,steps=50,step_size=1,plot=False):
        # Initial point to start gradient descent at
        step = descent_step(array[y_start][x_start],x_start,y_start)
        
        # Store each step taken in gradient descent in a list
        step_history = []
        step_history.append(step)
        
        # Plot 2D representation of array with startng point as a red marker
        if plot:
            matplotlib.pyplot.imshow(array,origin='lower',cmap='terrain')
            matplotlib.pyplot.plot(x_start,y_start,'ro')
        current_x = x_start
        current_y = y_start
    
        # Loop through specified number of steps of gradient descent to take
        for i in range(steps):
            prev_x = current_x
            prev_y = current_y
            
            # Extract array of neighbouring cells around current step location with size nominated
            neighbours=cell_neighbours(array,current_y,current_x,step_size)
            
            # Locate minimum in array (steepest slope from current point)
            next_step = neighbours.min()
            indices = np.where(array == next_step)
            
            # Update current point to now be the next point after stepping
            current_x, current_y = (indices[1][0],indices[0][0])
            step = descent_step(array[current_y][current_x],current_x,current_y)
            
            step_history.append(step)
            
            # Plot each step taken as a black line to the current point nominated by a red marker
            if plot:
                matplotlib.pyplot.plot([prev_x,current_x],[prev_y,current_y],'k-')
                matplotlib.pyplot.plot(current_x,current_y,'ro')
                
            # If step is to the same location as previously, this infers convergence and end loop
            if prev_y == current_y and prev_x == current_x:
                print(f"Converged in {i} steps")
                break
        return next_step,step_history
    
    np.random.seed(42)
    global_minimum = z.min()
    indices = np.where(z == global_minimum)
    print(f"Target: {global_minimum} @ {indices}")
    
    step_size = 0
    found_minimum = 99999
    
    # Random starting point
    start_x = np.random.randint(0,50)
    start_y = np.random.randint(0,50)
    
    # Increase step size until convergence on global minimum
    print('==========================')
    print(found_minimum)
    print(global_minimum)
    print('==========================')
    
    while found_minimum != global_minimum:
        step_size += 1
        try:
            found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
        except ValueError:
            pass
    
    print(f"Optimal step size {step_size}")
    found_minimum,steps = gradient_descent_3d(z,start_x,start_y,step_size=step_size,plot=True)
    print(f"Steps: {steps}")
    
    def multiDimenDist(point1,point2):
       #find the difference between the two points, its really the same as below
       deltaVals = [point2[dimension]-point1[dimension] for dimension in range(len(point1))]
       runningSquared = 0
       #because the pythagarom theorm works for any dimension we can just use that
       for coOrd in deltaVals:
           runningSquared += coOrd**2
       return runningSquared**(1/2)
    def findVec(point1,point2,unitSphere = False):
      #setting unitSphere to True will make the vector scaled down to a sphere with a radius one, instead of it's orginal length
      finalVector = [0 for coOrd in point1]
      for dimension, coOrd in enumerate(point1):
          #finding total differnce for that co-ordinate(x,y,z...)
          deltaCoOrd = point2[dimension]-coOrd
          #adding total difference
          finalVector[dimension] = deltaCoOrd
      if unitSphere:
          totalDist = multiDimenDist(point1,point2)
          unitVector =[]
          for dimen in finalVector:
              unitVector.append( dimen/totalDist)
          return unitVector
      else:
          return finalVector
    
    def generate_3d_plot(step_history):
        # Initialise empty lists for markers
        step_markers_x = []
        step_markers_y = []
        step_markers_z = []
        step_markers_u = []
        step_markers_v = []
        step_markers_w = []
        
        for index, step in enumerate(step_history):
            step_markers_x.append(step.x_index)
            step_markers_y.append(step.y_index)
            step_markers_z.append(step.value)
            
            # If we haven't reached the final step, calculate the vector between the current step and the next step
            if index < len(steps)-1:
                vec1 = [step.x_index,step.y_index,step.value]
                vec2 = [steps[index+1].x_index,steps[index+1].y_index,steps[index+1].value]
    
                result_vector = findVec(vec1,vec2)
                step_markers_u.append(result_vector[0])
                step_markers_v.append(result_vector[1])
                step_markers_w.append(result_vector[2])
            else:
                step_markers_u.append(0.1)
                step_markers_v.append(0.1)
                step_markers_w.append(0.1)
        
        # Include cones at each marker to show direction of step, scatter3d is to show the red line between points and surface for the terrain
        fig = go.Figure(data=[
            go.Cone(
            x=step_markers_x,
            y=step_markers_y,
            z=step_markers_z,
            u=step_markers_u,
            v=step_markers_v,
            w=step_markers_w,
            sizemode="absolute",
            sizeref=2,
            anchor='tail'),
    
            go.Scatter3d(
            x=step_markers_x,
            y=step_markers_y,
            z=step_markers_z,
            mode='lines',
            line=dict(
                color='red',
                width=2
            )),
    
            go.Surface(colorscale='Earth', z=quad_arr,opacity=0.5)])
    
    
        # Z axis is limited to the extent of the terrain array
        fig.update_layout(
            title='Gradient Descent Steps',
            scene = dict(zaxis = dict(range=[quad_arr.min(),quad_arr.max()],),),)
        return fig
        
    # Generate 3D plot from previous random starting location
    fig = generate_3d_plot(steps)
    HTML(plotly.offline.plot(fig, filename='random_starting_point_3d_gradient_descent.html',include_plotlyjs='cdn'))