python tensorflow deep-learning reinforcement-learning tflearn

TensorFlow 1.x: TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

I am new to TensorFlow. I made the following Neural Network in TensorFlow 1.x

import tensorflow as tf
import numpy as np

import tflearn

class ActorNetwork(object):
    """
    Input to the network is the state, output is the action
    under a deterministic policy.
    The output layer activation is a tanh to keep the action
    between -action_bound and action_bound
    """

    def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
        self.sess = sess
        self.s_dim = state_dim
        self.a_dim = action_dim
        self.action_bound = action_bound
        self.learning_rate = learning_rate
        self.tau = tau
        self.batch_size = batch_size

        # Actor Network
        self.inputs, self.out, self.scaled_out = self.create_actor_network()

        self.network_params = tf.trainable_variables()

        # Target Network
        self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()

        self.target_network_params = tf.trainable_variables()[
            len(self.network_params):]

        # Op for periodically updating target network with online network
        # weights
        self.update_target_network_params = \
            [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
                                                  tf.multiply(self.target_network_params[i], 1. - self.tau))
                for i in range(len(self.target_network_params))]

        # This gradient will be provided by the critic network
        self.action_gradient = tf.placeholder(tf.float32, [None, self.a_dim])

        # Combine the gradients here
        self.unnormalized_actor_gradients = tf.gradients(
            self.scaled_out, self.network_params, -self.action_gradient)
        self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))

        # Optimization Op
        self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
            apply_gradients(zip(self.actor_gradients, self.network_params))

        self.num_trainable_vars = len(
            self.network_params) + len(self.target_network_params)

    def create_actor_network(self):
        inputs = tflearn.input_data(shape=[None, self.s_dim])
        net = tflearn.fully_connected(inputs, 400)
        net = tflearn.layers.normalization.batch_normalization(net)
        net = tflearn.activations.relu(net)
        net = tflearn.fully_connected(net, 300)
        net = tflearn.layers.normalization.batch_normalization(net)
        net = tflearn.activations.relu(net)
        # Final layer weights are init to Uniform[-3e-3, 3e-3]
        w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
        out = tflearn.fully_connected(
            net, self.a_dim, activation='tanh', weights_init=w_init)
        # Scale output to -action_bound to action_bound
        scaled_out = tf.multiply(out, self.action_bound)
        return inputs, out, scaled_out

    def train(self, inputs, a_gradient):
        self.sess.run(self.optimize, feed_dict={
            self.inputs: inputs,
            self.action_gradient: a_gradient
        })

    def predict(self, inputs):
        return self.sess.run(self.scaled_out, feed_dict={
            self.inputs: inputs
        })

    def predict_target(self, inputs):
        return self.sess.run(self.target_scaled_out, feed_dict={
            self.target_inputs: inputs
        })

    def update_target_network(self):
        self.sess.run(self.update_target_network_params)

    def get_num_trainable_vars(self):
        return self.num_trainable_vars

When I call it once it does not give any error, however in the second time it gives an error. For example

with tf.Session() as sess:
    actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
    actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)

I get the following error for only actor2:

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

It has something to do with None value in the lambda function. But, why does it not provide an error for the first time?

Edit: Stack trace:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-2323bc1d5028> in <module>()
      1 with tf.Session() as sess:
      2     actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
----> 3     actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)

3 frames
<ipython-input-1-895268594a81> in __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size)
     48         self.unnormalized_actor_gradients = tf.gradients(
     49             self.scaled_out, self.network_params, -self.action_gradient)
---> 50         self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
     51 
     52         # Optimization Op

<ipython-input-1-895268594a81> in <lambda>(x)
     48         self.unnormalized_actor_gradients = tf.gradients(
     49             self.scaled_out, self.network_params, -self.action_gradient)
---> 50         self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
     51 
     52         # Optimization Op

/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
    178     """Call target, and fall back on dispatchers if there is a TypeError."""
    179     try:
--> 180       return target(*args, **kwargs)
    181     except (TypeError, ValueError):
    182       # Note: convert_to_eager_tensor currently raises a ValueError, not a

/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/math_ops.py in divide(x, y, name)
    323     return DivideDelegateWithName(x, name) / y
    324   else:
--> 325     return x / y
    326 
    327 

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

EDIT-2: From the suggestion, I wrote in TF 2.x. This actually eliminated the error. But are these two networks the same?

class ActorNetwork(object):
  def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
    self.state_dim = state_dim
    self.action_dim = action_dim
    self.action_bound = action_bound
    self.learning_rate = learning_rate
    self.tau  = tau
    self.batch_size = batch_size
    self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)

    #actor network
    self.inputs, self.out, self.scaled_out = self.create_actor_network()
    self.actor_model = keras.Model(inputs=self.inputs, outputs=self.scaled_out, name='actor_network')
    self.network_params = self.actor_model.trainable_variables

    #target actor network
    self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
    self.target_actor_model = keras.Model(inputs=self.target_inputs, outputs=self.target_scaled_out, name='target_actor_network')
    self.target_network_params = self.target_actor_model.trainable_variables


  def create_actor_network(self):
    inputs = Input(shape = (self.state_dim,), batch_size = None, name = "actor_input_state")

    net = layers.Dense(400, name = 'actor_dense_1a')(inputs)
    net = layers.BatchNormalization()(net)
    net = layers.Activation(activation=tf.nn.relu)(net)

    net = layers.Dense(300, name = 'actor_dense_1b')(net)
    net = layers.BatchNormalization()(net)
    net = layers.Activation(activation=tf.nn.relu)(net)

    # net = layers.Dense(20, name = 'actor_dense_1c')(net)
    # net = layers.BatchNormalization()(net)
    # net = layers.Activation(activation=tf.nn.relu)(net)

    # net = layers.Dense(10, name = 'actor_dense_1d')(net)
    # net = layers.BatchNormalization()(net)
    # net = layers.Activation(activation=tf.nn.relu)(net)
    
    w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003, seed=None)
    out = layers.Dense(self.action_dim, activation='tanh', name = 'actor_dense_2', kernel_initializer = w_init)(net)
    scaled_out = tf.multiply(out, self.action_bound, name = "actions_scaling")
    return inputs, out, scaled_out
  
  def update_target_network(self):
    self.update_target_network_params = [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) + tf.multiply(self.target_network_params[i], 1-self.tau)) for i in range(len(self.target_network_params))]
  
  def train(self, inputs, a_gradient):
    with tf.GradientTape() as self.tape:
      self.prediction = self.actor_model(inputs)
    self.unnormalized_actor_gradients = self.tape.gradient(self.prediction, self.network_params, output_gradients = -a_gradient)
    self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
    self.optimizer.apply_gradients(zip(self.actor_gradients, self.network_params))
    
  def predict(self, inputs):
    return self.actor_model(inputs)

  def predict_target(self, inputs):
    return self.target_actor_model(inputs)

Solution

Your problem is here:

self.network_params = tf.trainable_variables()

And this is also most definitely going to cause you a problem:

self.target_network_params = tf.trainable_variables()[
    len(self.network_params):]

The problem is that you are creating both models in the same TensorFlow graph. When you get tf.trainable_variables(), you are getting all trainable variables in the graph. The first time around, those are just the variables if the models you just created with self.create_actor_network(). But the second time, it will include the variables of the second ActorNetwork and the variables from the first one. Obviously, there are no gradients between the variables of the first network and the output of the second network, so tf.gradients produces some None results that then cause the error. The simplest solution would be to put each network in a different graph, for example:

with tf.Graph().as_default() as graph1, tf.Session() as sess1:
    actor1 = ActorNetwork(sess1, 1, 2, 1, 0.01, 0.003, 200)
with tf.Graph().as_default() as graph2, tf.Session() as sess2:
    actor2 = ActorNetwork(sess2, 1, 2, 1, 0.01, 0.003, 200)

You could also do this from inside the class, which would be safer, although you would not be able to create the session beforehand:

def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
    self.graph = tf.Graph()
    with self.graph.as_default():  # Add at the beginning of all methods
        self.sess = tf.Session()
        # ...

However, if you want to have both models within the same graph, you will need to change your code further to avoid using `tf.trainable_variables. You can for example keep track of the variables yourself:

def create_actor_network(self):
    all_vars = []  # list of model variables
    inputs = tflearn.input_data(shape=[None, self.s_dim])
    net = tflearn.fully_connected(inputs, 400)
    # Save layer variables
    all_vars.append(net.W)
    all_vars.append(net.b)
    # ...
    return inputs, out, scaled_out, all_vars  # Return variable list

The module tflearn.variables offers some helpers to simplify this, although nothing too sophisticated. In any case, I would advise against using TFLearn if you can avoid it, since it is unmaintained and is superseded by Keras (where you would just need to use .weights / .trainable_weights).