I am trying to train a CNN in Google Colab (Also tried with Tesla v100), with keras backend set to float16.
tf.keras.backend.set_floatx('float16')
but it throws an error while compiling the model with Conv2D.
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(16,(3,3), activation='relu', input_shape=(300,300,3)),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(.5),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(.5),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
Error message:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-9-c764c0cc3aa3> in <module>()
9 ])
10
---> 11 model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
7 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/checkpointable/base.py in _method_wrapper(self, *args, **kwargs)
440 self._setattr_tracking = False # pylint: disable=protected-access
441 try:
--> 442 method(self, *args, **kwargs)
443 finally:
444 self._setattr_tracking = previous_value # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, distribute, **kwargs)
447 else:
448 weighted_loss = training_utils.weighted_masked_objective(loss_fn)
--> 449 output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
450
451 if len(self.outputs) > 1:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_utils.py in weighted(y_true, y_pred, weights, mask)
674 score_array = math_ops.reduce_sum(score_array)
675 weights = math_ops.reduce_sum(weights)
--> 676 score_array = math_ops.div_no_nan(score_array, weights)
677 return K.mean(score_array)
678
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py in div_no_nan(x, y, name)
1025 raise TypeError("x and y must have the same dtype, got %r != %r" %
1026 (x_dtype, y_dtype))
-> 1027 return gen_math_ops.div_no_nan(x, y, name=name)
1028
1029
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py in div_no_nan(x, y, name)
3020 # Add nodes to the TensorFlow graph.
3021 _, _, _op = _op_def_lib._apply_op_helper(
-> 3022 "DivNoNan", x=x, y=y, name=name)
3023 _result = _op.outputs[:]
3024 _inputs_flat = _op.inputs
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
608 _SatisfiesTypeConstraint(base_type,
609 _Attr(op_def, input_arg.type_attr),
--> 610 param_name=input_name)
611 attrs[input_arg.type_attr] = attr_value
612 inferred_from[input_arg.type_attr] = input_name
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py in _SatisfiesTypeConstraint(dtype, attr_def, param_name)
58 "allowed values: %s" %
59 (param_name, dtypes.as_dtype(dtype).name,
---> 60 ", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
61
62
TypeError: Value passed to parameter 'x' has DataType float16 not in list of allowed values: float32, float64
however, when I remove the Convolution layers it compiles the model without any issue.
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(.5),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(.5),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
Hence the machine is clearly capable of float16, Is there any special thing needed to do with Keras to make Conv2D work in float16?
This appears to be fixed in latest tf-nightly build.
I was able to execute your code successfully using TensorFlow Version '1.14.1-dev20190520'
Install tf-nightly for terminal:
pip install tf-nightly
Install tf-nightly for google colab:
!pip install tf-nightly