Search code examples
tensorflowloss-functionerror-handling

ValueError: No gradients provided for any variable in my custom loss - Why?


Here is my code (you can copy and paste to execute it)

import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler

x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]).astype(np.float32)
y = np.array([[-1], [3], [7], [-2]]).astype(np.float32)

# scale x and y
x_scaler = MinMaxScaler()
x_scaler.fit(x)
x_sc = x_scaler.transform(x)

y_scaler = MinMaxScaler()
y_scaler.fit(y)
y_sc = y_scaler.transform(y)

batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x_sc, y_sc)).batch(batch_size=batch_size)

# create the model
model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(2,)),
        tf.keras.layers.Dense(units=3, activation='relu'),
        tf.keras.layers.Dense(units=1)
    ]
)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

def standard_loss(y_batch, y_pred, y_min_max):
    batches = y_pred.shape[0]
    loss = 0.0
    y_true_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_batch), tf.float32)
    y_pred_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_pred), tf.float32)

    for batch in range(batches):
        loss += tf.math.reduce_mean(tf.math.square(y_true_unsc[batch] - y_pred_unsc[batch]))

    return loss / batches

# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
    with tf.GradientTape() as tape:
        y_pred = model(x_batch, training=True)
        loss_value = standard_loss(y_batch, y_pred, y_scaler)

    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

The problem is located in my cost function (standard_loss). When I don't unscale my data, all work better as below:

def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0

for batch in range(batches):
    loss += tf.math.reduce_mean(tf.math.square(y_batch[batch] - y_pred[batch]))

return loss / batches

But when I let it as above, I got this error:

ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].

I need to unscale my data to use it for others computations.

Someone could help me understand why this happen?

EDIT 1:

The problem is due to the tape (in tf.GradientTape() as tape) which records all the operations, this series of operations by which it goes up in the opposite direction when calculating the gradient. My goal now is to figure out how to unscale my y_pred variable without the "tape" saving it and going astray when calculating the gradient. Ideas?

EDIT 2:

In my custom loss my unscale operation is a numpy operation and this operation is not recorded by "tape" since we go out of the tensorflow field. This is the reason why the error appears. So I'm going to look for a way to scale my data with a tensorflow operation in order to unscale them with a tensorflow operation.

SOLUTION :

EDIT 2 is the solution. Now, everything works perfectly.


Solution

  • In my custom loss my unscale operation is a numpy operation and this operation is not recorded by "tape" since we go out of the tensorflow field. This is the reason why the error appears. One solution is to use tensorflow operations to scale and unscale data in order to allow the tape to record the path. See code below,

    import tensorflow as tf
    import numpy as np
    
    x = tf.convert_to_tensor([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=tf.float32)
    y = tf.convert_toètensor([[-1], [3], [7], [-2]], dtype=tf.float32)
    
    # retrieve x and y min max
    xmin, xmax = tf.reduce_min(x, axis=0), tf.reduce_max(x, axis=0)
    ymin, ymax = tf.reduce_min(y, axis=0), tf.reduce_max(y, axis=0)
    
    batch_size = 2
    ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    
    # create the model
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Input(shape=(2,)),
            tf.keras.layers.Dense(units=3, activation='relu'),
            tf.keras.layers.Dense(units=1)
        ]
    )
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
    
    def standard_loss(y_batch, y_pred):
        # unscale y_pred (note that y_batch has never been scaled)
        y_pred_unsc = y_pred * (ymax - ymin) + ymin
    
        return tf.reduce_mean(tf.square(y_batch - y_pred_unsc)
    
    # training loop
    epochs = 1
    for epoch in range(epochs):
    print("\nStart of epoch %d" % (epoch, ))
    for step, (x_batch, y_batch) in enumerate(ds):
        with tf.GradientTape() as tape:
            # scale data (we see that I do not quit tensorflow operations)
            x_scale = (x_batch - xmin)/(xmax - xmin)
            y_pred = model(x_scale, training=True)
            loss_value = standard_loss(y_batch, y_pred)
    
        grads = tape.gradient(loss_value, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))