I have a Keras model defined in the following manner (Tried to keep only the necessary parts):
temperature = 5.0
def knowledge_distillation_loss(y_true, y_pred, lambda_const):
y_true, logits = y_true[:, :10], y_true[:, 10:]
y_soft = K.softmax(logits/temperature)
y_pred, y_pred_soft = y_pred[:, :10], y_pred[:, 10:]
return lambda_const*logloss(y_true, y_pred) + logloss(y_soft, y_pred_soft)
def get_model(num_labels):
#Some layers for model
model.add(Dense(num_labels))
logits = model.layers[-1].output
probabilities = Activation('softmax')(logits)
# softed probabilities
logits_T = Lambda(lambda x: x/temperature)(logits)
probabilities_T = Activation('softmax')(logits_T)
output = concatenate([probabilities, probabilities_T])
model = Model(model.input, output)
lambda_const = 0.07
model.compile(
optimizer=optimizers.SGD(lr=1e-1, momentum=0.9, nesterov=True),
loss=lambda y_true, y_pred: knowledge_distillation_loss(y_true, y_pred, lambda_const),
metrics=[accuracy])
return model
I am following this reference.
This is implemented using fit generator()
on Keras with tf backend. Obviously, I will have trouble when loading the model since temperature
is hared coded.
Also,
I wish to update temperature
parameter with respect to the epoch number in both loss function and model.
How do I define such a control signal?
I've turned this into a complete example of one way to do this.
You could make a class for the loss function.
class TemperatureLossFunction:
def __init__(self, temperature):
self.temperature = temperature
def loss_fun(self, y_truth, y_pred):
return self.temperature*keras.losses.mse(y_truth, y_pred)
def setTemperature(self, t, session=None):
if session:
session.run(self.temperature.assign( t )
elif tensorflow.get_default_session():
tensorflow.get_default_session().run(self.temperature.assign( t ))
class TemperatureLossCallback(keras.callbacks.Callback):
def __init__(self, temp_lf):
self.temp_lf = temp_lf
def on_epoch_end(self, epoch, params):
self.temp_lf.setTemperature(epoch)
I've created two methods for working with this, the first method creates and saves the model.
def init(session):
global temperature #global for serialization issues
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
inp = keras.layers.Input((4,4))
l1 = keras.layers.Lambda( lambda x: temperature*x )
op = l1(inp)
m = keras.models.Model(inputs=[inp], outputs=[op])
m.compile( optimizer = keras.optimizers.SGD(0.01), loss=tlo.loss_fun)
#make sure the session is the one your using!
session.run(temperature.initializer)
The first test I run makes sure we are changing the value.
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
session.run(temperature.assign(1))
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
The second test I run makes sure we can change the values with a callback.
cb = TemperatureLossCallback(tlo)
def gen():
for i in range(10):
yield numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4))
m.fit_generator(
gen(), steps_per_epoch=1, epochs=10, callbacks=[cb]
)
m.save("junk.h5")
Finally, to demonstrate reloading the file.
def restart(session):
global temperature
temperature = tensorflow.Variable(5.0)
tlo = TemperatureLossFunction(temperature)
loss_fun = tlo.loss_fun
m = keras.models.load_model(
"junk.h5",
custom_objects = {"loss_fun":tlo.loss_fun}
)
session.run(temperature.initializer)
m.evaluate( numpy.ones((1, 4, 4)), numpy.zeros((1, 4, 4)) )
session.run(temperature.assign(1))
m.evaluate( numpy.ones( (1, 4, 4) ), numpy.zeros( ( 1, 4, 4) ) )
This is just the code I use to start the program for completeness
import sys
if __name__=="__main__":
sess = tensorflow.Session()
with sess.as_default():
if "restart" in sys.argv:
restart(sess)
else:
init(sess)
One downside of this method, if you run this you will see that the temperature variable does not get loaded from the model file. It takes on the value assigned in the code.
On the plus side, both the loss function and the layer are referencing the same Variable
One way I found to save the variable value is to create a new layer and use the variable as the weight for the new layer.
class VLayer(keras.layers.Layer):
def __init__(self, *args, **kwargs):
super().__init__(**kwargs)
def build(self, input_shape):
self.v1 = self.add_weight(
dtype="float32",
shape = (),
trainable=False,
initializer="zeros"
)
def call(self, x):
return x*self.v1
def setValue(self, val):
self.set_weights( numpy.array([val]) )
Now when you load the model, the weight will be loaded. Unfortunately, I could not find a way to link the weight to a Variable on load. So there will be two variables, one for the loss function and one for the layer. Both of them can be set from a callback though. So I feel this method is on a more robust path.