I am trying to write a model in which there are three outputs, the latter two of which are to be trained with respect to targets present in the dataset, the former should just be a non-trainable output.
First, defining a dataset:
from typing import Tuple
import numpy as np
import tensorflow as tf
def ds_fn(in_shape: Tuple[int],
out_shape: Tuple[int],
dtype: tf.DType = tf.float32) -> tf.data.Dataset:
# Generator function.
def gen() -> Tuple[Tuple[np.array], Tuple[np.array]]:
for _ in range(1000):
# Inputs.
x0 = np.ones(in_shape, dtype=np.float32)
x1 = 2 * x0
# Outputs.
y0 = np.ones(out_shape, dtype=np.float32)
y1 = 2 * np.ones_like(y0)
y2 = 3 * np.ones_like(y1)
# Targets correspond to outputs 1 and 2 of the network. Output 0 has
# no target.
yield (x0, x1), (y1, y2)
return tf.data.Dataset.from_generator(gen,
output_types=((dtype,) * 2, # Input
(dtype,) * 2), # Output
output_shapes=((in_shape,) * 2, # Input
(out_shape,) * 2)) # Output
In the above, the dataset has two targets, corresponding to the last two outputs of the model defined below.
from tensorflow import keras
class ExampleModel(keras.Model):
def __init__(self, out_dim: int):
super().__init__()
self.dense_a = keras.layers.Dense(out_dim)
self.dense_b = keras.layers.Dense(out_dim)
def call(self, inputs, training=False):
a, b = inputs
x0 = self.dense_a(a)
x1 = self.dense_b(b)
x = (x0 + x1) / 2
return (x, # Output 0 - should not be trained.
2 * x,
3 * x)
From reading the Keras documentation, to handle this case where there are a greater number of model outputs than there are targets in the dataset (with the surplus outputs considered to be non-trainable), it appears that the Model.compile
arg loss_weights
should handle the matching between targets and losses. More concretely, given the following.
def model_fn(out_dim: int) -> ExampleModel:
m = ExampleModel(out_dim)
losses = [
None, # Output 0 - should not be trainable.
'mse',
'mse'
]
loss_weights = [
0, # Output 0 - should not be trainable.
1,
1
]
m.compile(loss=losses,
loss_weights=loss_weights,
optimizer='sgd')
return m
I would expect that Keras would disregard the first model output when computing the loss, given the None
loss provided and the 0
loss weight, however I am seeing the following error.
ValueError: The two structures don't have the same sequence length. Input structure has length 2, while shallow structure has length 3.
Which seems to indicate that this is not the case when run as follows.
if __name__ == "__main__":
bs = 16
in_dim = 4
out_dim = 8
epochs = 10
ds = ds_fn((bs, 1, in_dim), (bs, 1, out_dim))
ds = ds.repeat(epochs)
m = model_fn(out_dim)
m.fit(ds,
epochs=epochs,
batch_size=bs)
If I provide an additional target in data_fn
and gen
, combined with a dummy loss (lambda x, y: 0.0
. for example), then training commences. However, this will not scale to a non toy problem with potentially large outputs and targets (images, for example).
If I instead return a dict
from the model call
method and provide dict
for losses
and loss_weights
(both with keys matching that returned from call
), there is no change (I thought that the explicit output naming might allow Keras to match outputs, losses and targets).
Am I misunderstanding the intended purpose of lists as losses
(in which None
is allowed) and loss_weights
?
One of the way could be to create a dictionary mapping dataset and model and later use the key of that dictionary to control the loss, metrics (and relevant) parameters. Here is how it can be done.
Data
def ds_fn(
in_shape: Tuple[int],
out_shape: Tuple[int],
dtype: tf.DType = tf.float32
) -> tf.data.Dataset:
# Generator function.
def gen() -> Tuple[Tuple[np.array], Tuple[np.array]]:
for _ in range(1000):
# Inputs.
x0 = np.ones(in_shape, dtype=np.float32)
x1 = 2 * x0
# Outputs.
y0 = np.ones(out_shape, dtype=np.float32)
y1 = 2 * np.ones_like(y0)
y2 = 3 * np.ones_like(y1)
# Targets correspond to outputs 1 and 2 of the network. Output 0 has
# no target.
yield {'input_a': x0, 'input_b': x1}, {'output_a': y1, 'output_b': y2}
return tf.data.Dataset.from_generator(
gen,
output_types=(
{'input_a': dtype, 'input_b': dtype},
{'output_a': dtype, 'output_b': dtype}
),
output_shapes=(
{'input_a': in_shape, 'input_b': in_shape},
{'output_a': out_shape, 'output_b': out_shape}
)
)
Model
class ExampleModel(keras.Model):
def __init__(self, out_dim: int):
super().__init__()
self.dense_a = keras.layers.Dense(out_dim)
self.dense_b = keras.layers.Dense(out_dim)
def call(self, inputs, training=False):
x0 = self.dense_a(inputs['input_a'])
x1 = self.dense_b(inputs['input_b'])
x = (x0 + x1) / 2
return {
"output_a": x,
"output_b": 2 * x,
"output_c": 3 * x
}
Compile
def model_fn(out_dim: int) -> ExampleModel:
m = ExampleModel(out_dim)
losses = {
"output_b": 'mse',
"output_c": 'mse'
}
loss_weights = {
"output_a": 1,
"output_b": 1
}
m.compile(
loss=losses,
loss_weights=loss_weights,
optimizer='sgd'
)
return m
Run
m = model_fn(out_dim)
m.fit(
ds,
epochs=epochs,
batch_size=bs
)
loss: 3.2685e-13 - output_b_loss: 3.2685e-13 - output_c_loss: 0.0000e+00
Additional Resource