I am new to TensorFlow. I made the following Neural Network in TensorFlow 1.x
import tensorflow as tf
import numpy as np
import tflearn
class ActorNetwork(object):
"""
Input to the network is the state, output is the action
under a deterministic policy.
The output layer activation is a tanh to keep the action
between -action_bound and action_bound
"""
def __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.sess = sess
self.s_dim = state_dim
self.a_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
# Actor Network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.network_params = tf.trainable_variables()
# Target Network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
# Op for periodically updating target network with online network
# weights
self.update_target_network_params = \
[self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) +
tf.multiply(self.target_network_params[i], 1. - self.tau))
for i in range(len(self.target_network_params))]
# This gradient will be provided by the critic network
self.action_gradient = tf.placeholder(tf.float32, [None, self.a_dim])
# Combine the gradients here
self.unnormalized_actor_gradients = tf.gradients(
self.scaled_out, self.network_params, -self.action_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
# Optimization Op
self.optimize = tf.train.AdamOptimizer(self.learning_rate).\
apply_gradients(zip(self.actor_gradients, self.network_params))
self.num_trainable_vars = len(
self.network_params) + len(self.target_network_params)
def create_actor_network(self):
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
net = tflearn.fully_connected(net, 300)
net = tflearn.layers.normalization.batch_normalization(net)
net = tflearn.activations.relu(net)
# Final layer weights are init to Uniform[-3e-3, 3e-3]
w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003)
out = tflearn.fully_connected(
net, self.a_dim, activation='tanh', weights_init=w_init)
# Scale output to -action_bound to action_bound
scaled_out = tf.multiply(out, self.action_bound)
return inputs, out, scaled_out
def train(self, inputs, a_gradient):
self.sess.run(self.optimize, feed_dict={
self.inputs: inputs,
self.action_gradient: a_gradient
})
def predict(self, inputs):
return self.sess.run(self.scaled_out, feed_dict={
self.inputs: inputs
})
def predict_target(self, inputs):
return self.sess.run(self.target_scaled_out, feed_dict={
self.target_inputs: inputs
})
def update_target_network(self):
self.sess.run(self.update_target_network_params)
def get_num_trainable_vars(self):
return self.num_trainable_vars
When I call it once it does not give any error, however in the second time it gives an error. For example
with tf.Session() as sess:
actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
I get the following error for only actor2:
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
It has something to do with None value in the lambda function. But, why does it not provide an error for the first time?
Edit: Stack trace:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-2323bc1d5028> in <module>()
1 with tf.Session() as sess:
2 actor1 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
----> 3 actor2 = ActorNetwork(sess, 1, 2, 1, 0.01, 0.003, 200)
3 frames
<ipython-input-1-895268594a81> in __init__(self, sess, state_dim, action_dim, action_bound, learning_rate, tau, batch_size)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
<ipython-input-1-895268594a81> in <lambda>(x)
48 self.unnormalized_actor_gradients = tf.gradients(
49 self.scaled_out, self.network_params, -self.action_gradient)
---> 50 self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
51
52 # Optimization Op
/tensorflow-1.15.2/python3.6/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/tensorflow-1.15.2/python3.6/tensorflow_core/python/ops/math_ops.py in divide(x, y, name)
323 return DivideDelegateWithName(x, name) / y
324 else:
--> 325 return x / y
326
327
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
EDIT-2: From the suggestion, I wrote in TF 2.x. This actually eliminated the error. But are these two networks the same?
class ActorNetwork(object):
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.state_dim = state_dim
self.action_dim = action_dim
self.action_bound = action_bound
self.learning_rate = learning_rate
self.tau = tau
self.batch_size = batch_size
self.optimizer = tf.keras.optimizers.Adam(self.learning_rate)
#actor network
self.inputs, self.out, self.scaled_out = self.create_actor_network()
self.actor_model = keras.Model(inputs=self.inputs, outputs=self.scaled_out, name='actor_network')
self.network_params = self.actor_model.trainable_variables
#target actor network
self.target_inputs, self.target_out, self.target_scaled_out = self.create_actor_network()
self.target_actor_model = keras.Model(inputs=self.target_inputs, outputs=self.target_scaled_out, name='target_actor_network')
self.target_network_params = self.target_actor_model.trainable_variables
def create_actor_network(self):
inputs = Input(shape = (self.state_dim,), batch_size = None, name = "actor_input_state")
net = layers.Dense(400, name = 'actor_dense_1a')(inputs)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
net = layers.Dense(300, name = 'actor_dense_1b')(net)
net = layers.BatchNormalization()(net)
net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(20, name = 'actor_dense_1c')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
# net = layers.Dense(10, name = 'actor_dense_1d')(net)
# net = layers.BatchNormalization()(net)
# net = layers.Activation(activation=tf.nn.relu)(net)
w_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003, seed=None)
out = layers.Dense(self.action_dim, activation='tanh', name = 'actor_dense_2', kernel_initializer = w_init)(net)
scaled_out = tf.multiply(out, self.action_bound, name = "actions_scaling")
return inputs, out, scaled_out
def update_target_network(self):
self.update_target_network_params = [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) + tf.multiply(self.target_network_params[i], 1-self.tau)) for i in range(len(self.target_network_params))]
def train(self, inputs, a_gradient):
with tf.GradientTape() as self.tape:
self.prediction = self.actor_model(inputs)
self.unnormalized_actor_gradients = self.tape.gradient(self.prediction, self.network_params, output_gradients = -a_gradient)
self.actor_gradients = list(map(lambda x: tf.math.divide(x, self.batch_size), self.unnormalized_actor_gradients))
self.optimizer.apply_gradients(zip(self.actor_gradients, self.network_params))
def predict(self, inputs):
return self.actor_model(inputs)
def predict_target(self, inputs):
return self.target_actor_model(inputs)
Your problem is here:
self.network_params = tf.trainable_variables()
And this is also most definitely going to cause you a problem:
self.target_network_params = tf.trainable_variables()[
len(self.network_params):]
The problem is that you are creating both models in the same TensorFlow graph. When you get tf.trainable_variables()
, you are getting all trainable variables in the graph. The first time around, those are just the variables if the models you just created with self.create_actor_network()
. But the second time, it will include the variables of the second ActorNetwork
and the variables from the first one. Obviously, there are no gradients between the variables of the first network and the output of the second network, so tf.gradients
produces some None
results that then cause the error. The simplest solution would be to put each network in a different graph, for example:
with tf.Graph().as_default() as graph1, tf.Session() as sess1:
actor1 = ActorNetwork(sess1, 1, 2, 1, 0.01, 0.003, 200)
with tf.Graph().as_default() as graph2, tf.Session() as sess2:
actor2 = ActorNetwork(sess2, 1, 2, 1, 0.01, 0.003, 200)
You could also do this from inside the class, which would be safer, although you would not be able to create the session beforehand:
def __init__(self, state_dim, action_dim, action_bound, learning_rate, tau, batch_size):
self.graph = tf.Graph()
with self.graph.as_default(): # Add at the beginning of all methods
self.sess = tf.Session()
# ...
However, if you want to have both models within the same graph, you will need to change your code further to avoid using `tf.trainable_variables. You can for example keep track of the variables yourself:
def create_actor_network(self):
all_vars = [] # list of model variables
inputs = tflearn.input_data(shape=[None, self.s_dim])
net = tflearn.fully_connected(inputs, 400)
# Save layer variables
all_vars.append(net.W)
all_vars.append(net.b)
# ...
return inputs, out, scaled_out, all_vars # Return variable list
The module tflearn.variables
offers some helpers to simplify this, although nothing too sophisticated. In any case, I would advise against using TFLearn if you can avoid it, since it is unmaintained and is superseded by Keras (where you would just need to use .weights
/ .trainable_weights
).