Search code examples
pythonoptimizationgekko

Why does Gekko_NN_TF add so many degrees of freedom?


I am using Gekko for trajectory optimisation. The dynamics of my system are being modelled by a NN and wrapped for Gekko. In the minimal example below I am approximating a sinusoid with the NN. Using the NN I have 1700 degrees of freedom whilst replacing it with m.sin(ctrl) leaves me with 0 DOF.

I would have assumed that the NN is immutable throughout the optimisation, why are so many degrees of freedom added?

Here is a minimal example:

import tensorflow as tf
import numpy as np
import pandas as pd

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from gekko import GEKKO
from gekko.ML import Gekko_NN_TF, CustomMinMaxGekkoScaler



# generate some random sinuisodal data
X = np.linspace(0, 2*np.pi, 1000)
y = np.sin(X) + np.random.normal(0, 0.1, 1000)


# model the data with a neural network
model = Sequential([
    Dense(16, activation='relu', input_shape=(1,)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae'])


# normalise the data
X = pd.DataFrame(np.hstack([X.reshape(-1, 1), y.reshape(-1, 1)]), columns=['x', 'y'])

scaler = CustomMinMaxGekkoScaler(X, ['x'], ['y'])


history = model.fit(scaler.newdataScaled['x'].values, scaler.newdataScaled['y'].values, epochs=100, batch_size=32, verbose=0)

m = GEKKO(remote=True)

# options
m.options.NODES = 2
m.options.SOLVER = 3
m.options.IMODE = 6
m.options.MAX_ITER = 1000

m.time = np.linspace(0, 1, 101)

tf = m.FV(value=1, lb=0.1, ub=10)
tf.STATUS = 1

# wrap the model in a Gekko_NN_TF object
wrapped_model = Gekko_NN_TF(model, scaler.minMaxValues(), m, n_output=1, activationFxn='relu')

# set up state variables
x = m.CV(value=0)
u = m.CV(value=0)

# set up control variable
ctrl = m.MV(value=0, lb=0, ub=2*np.pi)
ctrl.STATUS = 1

# add equations of motion
m.Equation(x.dt() == tf * u)
m.Equation(u.dt() == tf * wrapped_model.predict([ctrl]))

# add endpoint constraint
waypoint = m.CV(value=0)
# if we've reached the waypoint, change it to 1
m.Equation(waypoint == m.if3(x - 1, 0, 1))

# fix the final value
m.fix_final(waypoint, val = 1)

# minimise final time
m.Obj(tf)

m.solve(GUI=True)

Thank you for your help.


Solution

  • The neural network adds degrees of freedom (1700) because of the time horizon (100 time steps) x 16 ReLU (Rectified Linear Unit) functions that each require an additional variable for the switching condition for the abs statement (not sure why the extra 100 variables though). Without the switching condition, the function would not be continuously differentiable for the gradient-based optimizer. If degrees of freedom are a problem, try using function that doesn't require additional degrees of freedom such as tanh activation functions. Most of the extra parts of the model are removed to highlight zero degrees of freedom when using tanh.

    import tensorflow as tf
    import numpy as np
    import pandas as pd
    
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense
    
    from gekko import GEKKO
    from gekko.ML import Gekko_NN_TF, CustomMinMaxGekkoScaler
    
    # generate some random sinuisodal data
    X = np.linspace(0, 2*np.pi, 1000)
    y = np.sin(X) + np.random.normal(0, 0.1, 1000)
    
    # model the data with a neural network
    model = Sequential([
        Dense(16, activation='tanh', input_shape=(1,)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mse', 'mae'])
    
    # normalise the data
    X = pd.DataFrame(np.hstack([X.reshape(-1, 1), y.reshape(-1, 1)]), columns=['x', 'y'])
    
    scaler = CustomMinMaxGekkoScaler(X, ['x'], ['y'])
    
    history = model.fit(scaler.newdataScaled['x'].values, scaler.newdataScaled['y'].values, epochs=100, batch_size=32, verbose=0)
    
    m = GEKKO(remote=True)
    
    # options
    m.options.NODES = 2
    m.options.SOLVER = 3
    m.options.IMODE = 6
    m.options.MAX_ITER = 1000
    
    m.time = np.linspace(0, 1, 101)
    
    tf = m.FV(value=1, lb=0.1, ub=10)
    tf.STATUS = 0
    
    # wrap the model in a Gekko_NN_TF object
    wrapped_model = Gekko_NN_TF(model, scaler.minMaxValues(), m, n_output=1, activationFxn='tanh')
    
    # set up state variables
    #x = m.Var(value=0)
    u = m.Var(value=0)
    
    # set up control variable
    ctrl = m.MV(value=0, lb=0, ub=2*np.pi)
    ctrl.STATUS = 0
    
    # add equations of motion
    #m.Equation(x.dt() == tf * u)
    m.Equation(u.dt() == tf * wrapped_model.predict([ctrl]))
    
    # add endpoint constraint
    #waypoint = m.CV(value=0)
    # if we've reached the waypoint, change it to 1
    #m.Equation(waypoint == m.if3(x - 1, 0, 1))
    
    # fix the final value
    #m.fix_final(waypoint, val = 1)
    
    # minimise final time
    #m.Obj(tf)
    
    m.solve()
    

    To reduce the degrees of freedom for the full model, use m.Var() instead of m.CV(). Another option is to use m.CV() with m.options.CV_TYPE=2 for a squared error instead of an l1-norm error that also adds additional degrees of freedom.

    The number of variables and equations in the implementation of the neural network can also be reduced by switching to Intermediate() declarations, similar to this example:

    sine

    from gekko import GEKKO
    import numpy as np
    import matplotlib.pyplot as plt  
    
    # generate training data
    x = np.linspace(0.0,2*np.pi,20)
    y = np.sin(x)
    
    # option for fitting function
    select = True # True / False
    if select:
        # Size with cosine function
        nin = 1  # inputs
        n1 = 1   # hidden layer 1 (linear)
        n2 = 1   # hidden layer 2 (nonlinear)
        n3 = 1   # hidden layer 3 (linear)
        nout = 1 # outputs
    else:
        # Size with hyperbolic tangent function
        nin = 1  # inputs
        n1 = 2   # hidden layer 1 (linear)
        n2 = 2   # hidden layer 2 (nonlinear)
        n3 = 2   # hidden layer 3 (linear)
        nout = 1 # outputs
    
    # Initialize gekko
    train = GEKKO() 
    test = GEKKO()
    
    model = [train,test]
    
    for m in model:
        # input(s)
        m.inpt = m.Param()
    
        # layer 1
        m.w1 = m.Array(m.FV, (nin,n1))
        m.l1 = [m.Intermediate(m.w1[0,i]*m.inpt) for i in range(n1)]
    
        # layer 2
        m.w2a = m.Array(m.FV, (n1,n2))
        m.w2b = m.Array(m.FV, (n1,n2))
        if select:
            m.l2 = [m.Intermediate(sum([m.cos(m.w2a[j,i]+m.w2b[j,i]*m.l1[j]) \
                                    for j in range(n1)])) for i in range(n2)]
        else:
            m.l2 = [m.Intermediate(sum([m.tanh(m.w2a[j,i]+m.w2b[j,i]*m.l1[j]) \
                                    for j in range(n1)])) for i in range(n2)]
    
        # layer 3
        m.w3 = m.Array(m.FV, (n2,n3))
        m.l3 = [m.Intermediate(sum([m.w3[j,i]*m.l2[j] \
                for j in range(n2)])) for i in range(n3)]
    
        # output(s)
        m.outpt = m.CV()
        m.Equation(m.outpt==sum([m.l3[i] for i in range(n3)]))
    
        # flatten matrices
        m.w1 = m.w1.flatten()
        m.w2a = m.w2a.flatten()
        m.w2b = m.w2b.flatten()
        m.w3 = m.w3.flatten()
    
    # Fit parameter weights
    m = train
    m.inpt.value=x
    m.outpt.value=y
    m.outpt.FSTATUS = 1
    for i in range(len(m.w1)):
        m.w1[i].FSTATUS=1
        m.w1[i].STATUS=1
        m.w1[i].MEAS=1.0
    for i in range(len(m.w2a)):
        m.w2a[i].STATUS=1
        m.w2b[i].STATUS=1
        m.w2a[i].FSTATUS=1
        m.w2b[i].FSTATUS=1
        m.w2a[i].MEAS=1.0
        m.w2b[i].MEAS=0.5
    for i in range(len(m.w3)):
        m.w3[i].FSTATUS=1
        m.w3[i].STATUS=1
        m.w3[i].MEAS=1.0
    m.options.IMODE = 2
    m.options.SOLVER = 3
    m.options.EV_TYPE = 2
    m.solve(disp=False)
    
    # Test sample points
    m = test
    for i in range(len(m.w1)):
        m.w1[i].MEAS=train.w1[i].NEWVAL
        m.w1[i].FSTATUS = 1
        print('w1['+str(i)+']: '+str(m.w1[i].MEAS))
    for i in range(len(m.w2a)):
        m.w2a[i].MEAS=train.w2a[i].NEWVAL
        m.w2b[i].MEAS=train.w2b[i].NEWVAL
        m.w2a[i].FSTATUS = 1
        m.w2b[i].FSTATUS = 1
        print('w2a['+str(i)+']: '+str(m.w2a[i].MEAS))
        print('w2b['+str(i)+']: '+str(m.w2b[i].MEAS))
    for i in range(len(m.w3)):
        m.w3[i].MEAS=train.w3[i].NEWVAL
        m.w3[i].FSTATUS = 1
        print('w3['+str(i)+']: '+str(m.w3[i].MEAS))
    m.inpt.value=np.linspace(-2*np.pi,4*np.pi,100)
    m.options.IMODE = 2
    m.options.SOLVER = 3
    m.solve(disp=False)
    
    plt.figure()
    plt.plot(x,y,'bo',label='data')
    plt.plot(test.inpt.value,test.outpt.value,'r-',label='predict')
    plt.legend(loc='best')
    plt.ylabel('y')
    plt.xlabel('x')
    plt.show()