I'm creating a Tensorflow Estimator from a Keras model. Currently, the estimator is created, the model is trained, and the model is evaluated without issues. However, on the last evaluation, the model is exported because I use the FinalExporter API, and I get a dimensions mismatch error:
INFO:tensorflow:Performing the final export in the end of training.
INFO:tensorflow:Calling model_fn.
Traceback (most recent call last):
File "/home/austinguo/.../lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1659, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Shape must be rank 4 but is rank 3 for 'model/block5_conv1/Conv2D' (op: 'Conv2D') with input shapes: [14,14,512], [3,3,512,512].
It appears that the issue occurs when the Keras model has clone_model
called on it before export (the traceback below shows this). However, the dimensions mismatch error seems to imply I have an issue with my input pipeline, which doesn't make sense to me since I don't understand how my model can even train and evaluate without issues if the format of the input functions is incorrect or if my input pipeline is broken somewhere else.
The full traceback is below:
Traceback (most recent call last):
File "/usr/lib/python3.5/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/austinguo/tf-keras_siamese/template/trainer/task.py", line 277, in <module>
main()
File "/home/austinguo/tf-keras_siamese/template/trainer/task.py", line 262, in main
experiment.run(estimator, args)
File "/home/austinguo/tf-keras_siamese/template/trainer/experiment.py", line 102, in run
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 471, in train_and_evaluate
return executor.run()
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 611, in run
return self.run_local()
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 712, in run_local
saving_listeners=saving_listeners)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 358, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1124, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1158, in _train_model_default
saving_listeners)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1408, in _train_with_estimator_spec
any_step_done = True
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py", line 788, in __exit__
self._close_internal(exception_type)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py", line 821, in _close_internal
h.end(self._coordinated_creator.tf_sess)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/training/basic_session_run_hooks.py", line 588, in end
self._save(session, last_step)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/training/basic_session_run_hooks.py", line 607, in _save
if l.after_save(session, step):
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 517, in after_save
self._evaluate(global_step_value) # updates self.eval_result
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 537, in _evaluate
self._evaluator.evaluate_and_export())
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 925, in evaluate_and_export
is_the_final_export)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/training.py", line 958, in _export_eval_result
is_the_final_export=is_the_final_export))
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/exporter.py", line 419, in export
is_the_final_export)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/exporter.py", line 126, in export
strip_default_attrs=self._strip_default_attrs)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1645, in export_savedmodel
experimental_mode=model_fn_lib.ModeKeys.PREDICT)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 723, in export_saved_model
checkpoint_path=checkpoint_path)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 827, in experimental_export_all_saved_models
save_variables, mode=model_fn_lib.ModeKeys.PREDICT)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 897, in _add_meta_graph_for_mode
config=self.config)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/keras.py", line 278, in model_fn
labels)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/keras.py", line 201, in _clone_and_build_model
optimizer_iterations=global_step)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/models.py", line 466, in clone_and_build_model
clone = clone_model(model, input_tensors=input_tensors)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/models.py", line 271, in clone_model
return _clone_functional_model(model, input_tensors=input_tensors)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/models.py", line 161, in _clone_functional_model
**kwargs))
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/engine/base_layer.py", line 554, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/engine/network.py", line 815, in call
mask=masks)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/engine/network.py", line 1002, in _run_internal_graph
output_tensors = layer.call(computed_tensor, **kwargs)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/keras/layers/convolutional.py", line 194, in call
outputs = self._convolution_op(inputs, self.kernel)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 966, in __call__
return self.conv_op(inp, filter)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 591, in __call__
return self.call(inp, filter)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 208, in __call__
name=self.name)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 1026, in conv2d
data_format=data_format, dilations=dilations, name=name)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
op_def=op_def)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1823, in __init__
control_input_ops)
File "/home/austinguo/census/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1662, in _create_c_op
raise ValueError(str(e))
ValueError: Shape must be rank 4 but is rank 3 for 'model/block5_conv1/Conv2D' (op: 'Conv2D') with input shapes: [14,14,512], [3,3,512,512].
Any help would be greatly appreciated. Thank you!
Solved the issue.
The cause of the dimensions mismatch was my use of Tensorflow's experimental make_batched_features_dataset() Dataset API. This creates datasets that are already batched based on the specified batch size. However, for this to work, the input Tensors to the model need to have an additional -1 dimension in the beginning specifying batch, i.e. (-1, ...) where ... are the other shape dimensions.
Without this, the Tensors that are being served by the ServingInputReceiver are not the correct shape, though training can work if you accidentally batch the dataset again after make_batched_features_dataset() already batches the dataset.