Following this paper on domain adaptation, I am trying to implement the following layer for gradient reversal (written for Keras with the Theano backend, as found in this Keras issue) in Tensorflow, as my model does not run well with Theano.
class GradientReversalLayer(Layer):
""" Reverse a gradient
<feedforward> return input x
<backward> return -lambda * delta
"""
def __init__(self, hp_lambda, **kwargs):
super(GradientReversalLayer, self).__init__(**kwargs)
self.hp_lambda = hp_lambda
self.gr_op = ReverseGradient(self.hp_lambda)
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return self.gr_op(x)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {"name": self.__class__.__name__,
"lambda": self.hp_lambda}
base_config = super(GradientReversalLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
The layer performs this operation:
import theano
from keras.engine import Layer
class ReverseGradient(theano.Op):
""" theano operation to reverse the gradients
Introduced in http://arxiv.org/pdf/1409.7495.pdf
"""
view_map = {0: [0]}
__props__ = ('hp_lambda', )
def __init__(self, hp_lambda):
super(ReverseGradient, self).__init__()
self.hp_lambda = hp_lambda
def make_node(self, x):
assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
x = theano.tensor.as_tensor_variable(x)
return theano.Apply(self, [x], [x.type()])
def perform(self, node, inputs, output_storage):
xin, = inputs
xout, = output_storage
xout[0] = xin
def grad(self, input, output_gradients):
return [-self.hp_lambda * output_gradients[0]]
def infer_shape(self, node, i0_shapes):
return i0_shapes
If I run my model with the tf backend and with this function written in Theano I get the following error:
theano.tensor.var.AsTensorError: ('Cannot convert Tensor("concatenate_1/concat:0", shape=(?, ?, 128), dtype=float32) to TensorType', <class 'tensorflow.python.framework.ops.Tensor'>)
After calling it like this:
lstm_concat = concatenate([hidden_out_1, hidden_out_2])
lstm_concat = FlipGradientKeras.GradientReversalLayer(0.31)(lstm_concat)
The documentation about adding a new operation only suggests to implement it in C++.
The ops codes show the general framework, but I'd like to be sure that everything that I'm implementing everything that the Theano op does.
I would assume it would be something on the lines of:
def ReverseGradient(input_tensor, hp_lambda):
with ops.name_scope(name, "ReverseGradient", [input_tensor, hp_lambda]) as name:
input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
But I'm really not sure about the rest.
Thanks in advance!
I solved the problem by expanding on the work done here.
Here's the working code:
import tensorflow as tf
from keras.engine import Layer
import keras.backend as K
def reverse_gradient(X, hp_lambda):
'''Flips the sign of the incoming gradient during training.'''
try:
reverse_gradient.num_calls += 1
except AttributeError:
reverse_gradient.num_calls = 1
grad_name = "GradientReversal%d" % reverse_gradient.num_calls
@tf.RegisterGradient(grad_name)
def _flip_gradients(op, grad):
return [tf.negative(grad) * hp_lambda]
g = K.get_session().graph
with g.gradient_override_map({'Identity': grad_name}):
y = tf.identity(X)
return y
class GradientReversal(Layer):
'''Flip the sign of gradient during training.'''
def __init__(self, hp_lambda, **kwargs):
super(GradientReversal, self).__init__(**kwargs)
self.supports_masking = False
self.hp_lambda = hp_lambda
def build(self, input_shape):
self.trainable_weights = []
def call(self, x, mask=None):
return reverse_gradient(x, self.hp_lambda)
def get_output_shape_for(self, input_shape):
return input_shape
def get_config(self):
config = {}
base_config = super(GradientReversal, self).get_config()
return dict(list(base_config.items()) + list(config.items()))