I have the following codes and problem when trying to create OrderedDict for multiple feature inputs (i.e., features a-g) and one label h.
def preprocess(dataset):
def batch_format_fn(element):
return collections.OrderedDict(
x=collections.OrderedDict(
a=tf.TensorSpec(shape=[None,], dtype=tf.int32),
b=tf.TensorSpec(shape=[None,], dtype=tf.int32),
c=tf.TensorSpec(shape=[None,], dtype=tf.int32),
d=tf.TensorSpec(shape=[None,], dtype=tf.int32),
e=tf.TensorSpec(shape=[None,], dtype=tf.int32),
f=tf.TensorSpec(shape=[None,], dtype=tf.int32),
g=tf.TensorSpec(shape=[None,], dtype=tf.int32)),
y=tf.TensorSpec(shape=[None,], dtype=tf.int32))
return dataset.map(batch_format_fn).prefetch(PREFETCH_BUFFER)
preprocessed_sample_dataset = preprocess(example_dataset)
def create_keras_model():
model = Sequential([
feature_layer,
Dense(64, activation='relu'),
Dense(64, activation='relu'),
Dense(3, activation='softmax') #classification 3 outputs
])
return model
def model_fn():
keras_model = create_keras_model()
return tff.learning.from_keras_model(
keras_model,
input_spec=preprocessed_sample_dataset.element_spec,
loss=losses.SparseCategoricalCrossentropy(),
metrics=[metrics.SparseCategoricalAccuracy()])
It shows an error like this when executing input_spec=preprocessed_sample_dataset.element_spec
:
TypeError: Unsupported return value from function passed to Dataset.map(): OrderedDict([('x', OrderedDict([('a', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('b', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('c', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('d', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('e', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('f', TensorSpec(shape=(None,), dtype=tf.int32, name=None)), ('g', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])), ('y', TensorSpec(shape=(None,), dtype=tf.int32, name=None))]).
I have read this alternative solution, however it is not clear how to implement it in my case. Hence, how to correctly assign ordered dict for the multiple features in TFF?
The current example_dataset.element_spec is as follows:
OrderedDict([
('a', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('b', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('c', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('d', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('e', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('f', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('g', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('y', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])
I want the element_spec becomes like this:
OrderedDict([('x', OrderedDict([
('a', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('b', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('c', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('d', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('e', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('f', TensorSpec(shape=(None,), dtype=tf.int32, name=None)),
('g', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])),
('y', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])
How to make the element_spec as the latter one using the batch_format_fn?
batch_format_fn
currently returns a structure of tensor types; tf.data.Dataset.map
expects to receive a structure of tensors as the return value of the function.
We should update batch_format_fn
to reformat its element
argument and return that instead. Let's try something like:
def batch_format_fn(element):
feature_dict = collections.OrderedDict(
a=element['a'],
b=element['b'],
c=element['c'],
d=element['d'],
e=element['e'],
f=element['f'],
g=element['g'],
)
return collections.OrderedDict(x=feature_dict, y=element['y'])
and keeping everything else the same.