I'm trying to run a custom-build model with TF in Eager Execution, but receive the error below, which I really don't know how to interpret:
AttributeError Traceback (most recent call last)
<ipython-input-24-113069c95a32> in <module>()
17 answer=tf.convert_to_tensor(answers_train[i], dtype=tf.float32)
18
---> 19 grads = grad(model, sent, quest, answer)
20 optimizer.apply_gradients(zip(grads, model.variables),
21 global_step=tf.train.get_or_create_global_step())
<ipython-input-20-21f32f7e2b32> in grad(model, sent, quest, targets)
2 with tfe.GradientTape() as tape:
3 loss_value = loss(model, sent, quest, targets)
----> 4 return tape.gradient(loss_value, model.variables)
/Users/sdoneva/anaconda/lib/python3.6/site-packages/tensorflow/python/eager/backprop.py in __exit__(self, typ, value, traceback)
715
716 def __exit__(self, typ, value, traceback):
--> 717 tape.pop_tape(self._tape)
718
719 def watch(self, tensor):
/Users/sdoneva/anaconda/lib/python3.6/site-packages/tensorflow/python/eager/tape.py in pop_tape(tape)
60 def pop_tape(tape):
61 """Pops the top tape in the stack, if any."""
---> 62 pywrap_tensorflow.TFE_Py_TapeSetRemove(tape._tape) # pylint: disable=protected-access
63
64
AttributeError: 'NoneType' object has no attribute '_tape'
My inputs are arrays converted to Tensors:
sent=tf.convert_to_tensor(contexts_train[i], dtype=tf.float32)
quest=tf.convert_to_tensor(questions_train[i], dtype=tf.float32)
answer=tf.convert_to_tensor(answers_train[i], dtype=tf.float32)
And this is how I defined my grad and loss function:
def grad(model, sent, quest, targets):
with tfe.GradientTape() as tape:
loss_value = loss(model, sent, quest, targets)
return tape.gradient(loss_value, model.variables)
def loss(model, sent, quest, y):
prediction = model.predict(sent, quest)
return tf.keras.losses.categorical_crossentropy(y, prediction)
And how I call the training:
grads = grad(model, sent, quest, answer)
optimizer.apply_gradients(zip(grads, model.variables),
global_step=tf.train.get_or_create_global_step())
For the sake of completeness, here the Model:
class Model(tf.keras.Model):
def __init__(self):
super(Model, self).__init__()
self.embed=tf.keras.layers.Embedding(42,50)
self.grucell=tf.keras.layers.GRUCell(50)
self.rnn=tf.keras.layers.RNN(self.grucell)
self.dense=tf.keras.layers.Dense(42,activation=tf.nn.softmax)
self.dropout=tf.keras.layers.Dropout(0.3)
def predict(self, sentence, question):
encoded_sentence=self.embed(sentence)
encoded_sentence=tf.keras.backend.expand_dims(encoded_sentence, axis=-1)
encoded_sentence=self.rnn(encoded_sentence)
encoded_sentence=self.dropout(encoded_sentence)
encoded_question=self.embed(question)
encoded_question=tf.keras.backend.expand_dims(encoded_question, axis=-1)
encoded_question=self.rnn(encoded_question)
encoded_question=self.dropout(encoded_question)
merged= tf.keras.layers.concatenate([encoded_sentence, encoded_question])
pred= self.dense(merged)
pred= tf.keras.backend.expand_dims(pred, axis=1)
return pred
It would be great to receive some help to understand what is happening in the background.
You need to call tape.gradient
outside the scope of the with
block. In particular, change your grad
function to the following:
def grad(model, sent, quest, targets):
with tfe.GradientTape() as tape:
loss_value = loss(model, sent, quest, targets)
return tape.gradient(loss_value, model.variables)