Dimension in Tensorflow / keras and sparse_categorical_crossentropy

I cannot understand how to use tensorflow dataset as input for my model. I have a X as (n_sample, max_sentence_size) and a y as (n_sample) but I cannot match the dimension, I am not sure what tensorflow do internaly.

Below you can find a reprroducible example with empty matrix, but my data is not empty, it is an integer representation of text.

X_train = np.zeros((16, 6760))
y_train = np.zeros((16))

train = tf.data.Dataset.from_tensor_slices((X_train, y_train))

# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354

train = train.shuffle(BUFFER_SIZE)#.batch(BATCH_SIZE)



# Select index of interest in text
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64, mask_zero=False),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])

model.compile(loss="sparse_categorical_crossentropy",
              # loss=tf.keras.losses.MeanAbsoluteError(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['sparse_categorical_accuracy'])


history = model.fit(train, epochs=3,
                   )

    ValueError                                Traceback (most recent call last)
    <ipython-input-74-3a160a5713dd> in <module>
    ----> 1 history = model.fit(train, epochs=3,
          2                     # validation_data=test,
          3                     # validation_steps=30
          4                    )
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
        817         max_queue_size=max_queue_size,
        818         workers=workers,
    --> 819         use_multiprocessing=use_multiprocessing)
        820 
        821   def evaluate(self,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
        340                 mode=ModeKeys.TRAIN,
        341                 training_context=training_context,
    --> 342                 total_epochs=epochs)
        343             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
        344 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
        126         step=step, mode=mode, size=current_batch_size) as batch_logs:
        127       try:
    --> 128         batch_outs = execution_function(iterator)
        129       except (StopIteration, errors.OutOfRangeError):
        130         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
         96     # `numpy` translates Tensors to values in Eager mode.
         97     return nest.map_structure(_non_none_constant_value,
    ---> 98                               distributed_function(input_fn))
         99 
        100   return execution_function
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
        566         xla_context.Exit()
        567     else:
    --> 568       result = self._call(*args, **kwds)
        569 
        570     if tracing_count == self._get_tracing_count():
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
        613       # This is the first call of __call__, so we have to initialize.
        614       initializers = []
    --> 615       self._initialize(args, kwds, add_initializers_to=initializers)
        616     finally:
        617       # At this point we know that the initialization is complete (or less
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
        495     self._concrete_stateful_fn = (
        496         self._stateful_fn._get_concrete_function_internal_garbage_collected( 
# pylint: disable=protected-access
    --> 497             *args, **kwds))
        498 
        499     def invalid_creator_scope(*unused_args, **unused_kwds):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs)
       2387       args, kwargs = None, None
       2388     with self._lock:
    -> 2389       graph_function, _, _ = self._maybe_define_function(args, kwargs)
       2390     return graph_function
       2391 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
       2701 
       2702       self._function_cache.missed.add(call_context_key)
    -> 2703       graph_function = self._create_graph_function(args, kwargs)
       2704       self._function_cache.primary[cache_key] = graph_function
       2705       return graph_function, args, kwargs
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
       2591             arg_names=arg_names,
       2592             override_flat_arg_shapes=override_flat_arg_shapes,
    -> 2593             capture_by_value=self._capture_by_value),
       2594         self._function_attributes,
       2595         # Tell the ConcreteFunction to clean up its graph once it goes out of
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
        976                                           converted_func)
        977 
    --> 978       func_outputs = python_func(*func_args, **func_kwargs)
        979 
        980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
        437         # __wrapped__ allows AutoGraph to swap in a converted function. We give
        438         # the function a weak reference to itself to avoid a reference cycle.
    --> 439         return weak_wrapped_fn().__wrapped__(*args, **kwds)
        440     weak_wrapped_fn = weakref.ref(wrapped_fn)
        441 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
         83     args = _prepare_feed_values(model, input_iterator, mode, strategy)
         84     outputs = strategy.experimental_run_v2(
    ---> 85         per_replica_function, args=args)
         86     # Out of PerReplica outputs reduce or pick values to return.
         87     all_outputs = dist_utils.unwrap_output_dict(
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
        761       fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
        762                                 convert_by_default=False)
    --> 763       return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
        764 
        765   def reduce(self, reduce_op, value, axis):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
       1817       kwargs = {}
       1818     with self._container_strategy().scope():
    -> 1819       return self._call_for_each_replica(fn, args, kwargs)
       1820 
       1821   def _call_for_each_replica(self, fn, args, kwargs):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
       2162         self._container_strategy(),
       2163         replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
    -> 2164       return fn(*args, **kwargs)
       2165 
       2166   def _reduce_to(self, reduce_op, value, destinations):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
        290   def wrapper(*args, **kwargs):
        291     with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
    --> 292       return func(*args, **kwargs)
        293 
        294   if inspect.isfunction(func) or inspect.ismethod(func):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
        431       y,
        432       sample_weights=sample_weights,
    --> 433       output_loss_metrics=model._output_loss_metrics)
        434 
        435   if reset_metrics:
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
        310           sample_weights=sample_weights,
        311           training=True,
    --> 312           output_loss_metrics=output_loss_metrics))
        313   if not isinstance(outs, list):
        314     outs = [outs]
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
        251               output_loss_metrics=output_loss_metrics,
        252               sample_weights=sample_weights,
    --> 253               training=training))
        254       if total_loss is None:
        255         raise ValueError('The model cannot be run '
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
        165 
        166         if hasattr(loss_fn, 'reduction'):
    --> 167           per_sample_losses = loss_fn.call(targets[i], outs[i])
        168           weighted_losses = losses_utils.compute_weighted_loss(
        169               per_sample_losses,
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in call(self, y_true, y_pred)
        219       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
        220           y_pred, y_true)
    --> 221     return self.fn(y_true, y_pred, **self._fn_kwargs)
        222 
        223   def get_config(self):
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/losses.py in sparse_categorical_crossentropy(y_true, y_pred, from_logits, axis)
        976 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
        977   return K.sparse_categorical_crossentropy(
    --> 978       y_true, y_pred, from_logits=from_logits, axis=axis)
        979 
        980 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/keras/backend.py in sparse_categorical_crossentropy(target, output, from_logits, axis)
       4571     with get_graph().as_default():
       4572       res = nn.sparse_softmax_cross_entropy_with_logits_v2(
    -> 4573           labels=target, logits=output)
       4574   else:
       4575     res = nn.sparse_softmax_cross_entropy_with_logits_v2(
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits_v2(labels, logits, name)
       3535   """
       3536   return sparse_softmax_cross_entropy_with_logits(
    -> 3537       labels=labels, logits=logits, name=name)
       3538 
       3539 
    
    /opt/conda/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_ops.py in sparse_softmax_cross_entropy_with_logits(_sentinel, labels, logits, name)
       3451                        "should equal the shape of logits except for the last "
       3452                        "dimension (received %s)." % (labels_static_shape,
    -> 3453                                                      logits.get_shape()))
       3454     # Check if no reshapes are required.
       3455     if logits.get_shape().ndims == 2:
    
    ValueError: Shape mismatch: The shape of labels (received (1,)) should equal the shape of logits except for the last dimension (received (6760, 5354)).

Solution

this works for me in Tensorflow 2.0.

import numpy as np

# Prepare for tensorflow
BUFFER_SIZE = 10000
BATCH_SIZE = 64
VOCAB_SIZE = 5354


X_train = np.zeros((16,6760))
y_train = np.zeros((16,1))  # This is changed
train = tf.data.Dataset.from_tensor_slices((X_train, y_train))

train = train.shuffle(BUFFER_SIZE).batch(8) # This is changed

# Select index of interest in text

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=VOCAB_SIZE, output_dim=64,input_length= 6760, mask_zero=False),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(VOCAB_SIZE, activation='softmax'),
])

print(model.summary())

model.compile(loss="sparse_categorical_crossentropy",
              # loss=tf.keras.losses.MeanAbsoluteError(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['sparse_categorical_accuracy'])

history = model.fit(train, epochs=3)