RaggedTensor becomes Tensor in loss function

I have a sequence-to-sequence model in which I am attempting to predict the output sequence following a transformation. In doing so, I need to compute the MSE between elements in a ragged tensor:

def cpu_bce(y_value, y_pred):
    with tf.device('/CPU:0'):
        y_v = y_value.to_tensor()
        y_p = y_pred.to_tensor()
        
        return tf.keras.losses.MeanSquaredError()(y_v, y_p)

Yet, when executing it encounters the error:

    AttributeError: 'Tensor' object has no attribute 'to_tensor'

What causes this issue? The GRU seems to return a RaggedTensor when called directly. Yet at runtime, the arguments to the loss functions are normal Tensors.

import tensorflow as tf
import numpy as np
import functools

def generate_example(n):
    
    for i in range(n):
        dims = np.random.randint(7, 11)
        x = np.random.random((dims, ))
        y = 2 * x.cumsum()

        yield tf.constant(x), tf.constant(y)

N = 200

ds = tf.data.Dataset.from_generator(
    functools.partial(generate_example, N),
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.float32),
    ),
)

def rag(x, y):
    x1 = tf.expand_dims(x, 0)
    y1 = tf.expand_dims(y, 0)
    
    x1 = tf.expand_dims(x1, -1)
    y1 = tf.expand_dims(y1, -1)
    
    return (
        tf.RaggedTensor.from_tensor(x1),
        tf.RaggedTensor.from_tensor(y1),
    )

def unexp(x, y):
    return (
        tf.squeeze(x, axis=1),
        tf.squeeze(y, axis=1)
    )

ds = ds.map(rag).batch(32).map(unexp)

model = tf.keras.Sequential([
    tf.keras.Input(
        type_spec=tf.RaggedTensorSpec(shape=[None, None, 1],
                                      dtype=tf.float32)),
    tf.keras.layers.GRU(1, return_sequences=True),
])

def cpu_bce(y_value, y_pred):
    with tf.device('/CPU:0'):
        y_v = y_value.to_tensor()
        y_p = y_pred.to_tensor()
        
        return tf.keras.losses.MeanSquaredError()(y_v, y_p)

model.compile(loss=cpu_bce, optimizer="adam", metrics=[cpu_bce])

model.fit(ds, epochs=3)

Solution

In your loss function, you can re-write it in the following ways to make it work.

def cpu_bce(y_value, y_pred):
    with tf.device('/CPU:0'):
        if isinstance(y_value, tf.RaggedTensor):
            y_value = y_value.to_tensor()
            
        if isinstance(y_pred, tf.RaggedTensor):   
            y_pred = y_pred.to_tensor()
            
        return tf.keras.losses.MeanSquaredError()(y_value, y_pred)

model.compile(loss=cpu_bce, optimizer="adam", metrics=[cpu_bce])
model.fit(ds, epochs=3) # loss & metrics will vary

Or, you don't need to convert ragged tensor, keep as it is.

def cpu_bce(y_value, y_pred):
    with tf.device('/CPU:0'):
        return tf.keras.losses.MeanSquaredError()(y_value, y_pred)

model.compile(loss=cpu_bce, optimizer="adam", metrics=[cpu_bce])
model.fit(ds, epochs=3) # loss & metrics will alike

The reason you got AttributeError is because in metrics=[cpu_bce], the target and prediction tensor get converts to tesnor internally. You can inspect by printing your target and prediction in loss function. You would find that for loss function it's ragged but for metric function it's tensor. It may not feel convenient, in that case feel free to raise ticket in GitHub.