Here is my code (you can copy and paste to execute it)
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
x = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]).astype(np.float32)
y = np.array([[-1], [3], [7], [-2]]).astype(np.float32)
# scale x and y
x_scaler = MinMaxScaler()
x_scaler.fit(x)
x_sc = x_scaler.transform(x)
y_scaler = MinMaxScaler()
y_scaler.fit(y)
y_sc = y_scaler.transform(y)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x_sc, y_sc)).batch(batch_size=batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
y_true_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_batch), tf.float32)
y_pred_unsc = tf.convert_to_tensor(y_min_max.inverse_transform(y_pred), tf.float32)
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_true_unsc[batch] - y_pred_unsc[batch]))
return loss / batches
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
y_pred = model(x_batch, training=True)
loss_value = standard_loss(y_batch, y_pred, y_scaler)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
The problem is located in my cost function (standard_loss). When I don't unscale my data, all work better as below:
def standard_loss(y_batch, y_pred, y_min_max):
batches = y_pred.shape[0]
loss = 0.0
for batch in range(batches):
loss += tf.math.reduce_mean(tf.math.square(y_batch[batch] - y_pred[batch]))
return loss / batches
But when I let it as above, I got this error:
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
I need to unscale my data to use it for others computations.
Someone could help me understand why this happen?
EDIT 1:
The problem is due to the tape (in tf.GradientTape() as tape) which records all the operations, this series of operations by which it goes up in the opposite direction when calculating the gradient. My goal now is to figure out how to unscale my y_pred variable without the "tape" saving it and going astray when calculating the gradient. Ideas?
EDIT 2:
In my custom loss my unscale operation is a numpy operation and this operation is not recorded by "tape" since we go out of the tensorflow field. This is the reason why the error appears. So I'm going to look for a way to scale my data with a tensorflow operation in order to unscale them with a tensorflow operation.
SOLUTION :
EDIT 2 is the solution. Now, everything works perfectly.
In my custom loss my unscale operation is a numpy operation and this operation is not recorded by "tape" since we go out of the tensorflow field. This is the reason why the error appears. One solution is to use tensorflow operations to scale and unscale data in order to allow the tape to record the path. See code below,
import tensorflow as tf
import numpy as np
x = tf.convert_to_tensor([[1, 2], [3, 4], [5, 6], [7, 8]], dtype=tf.float32)
y = tf.convert_toètensor([[-1], [3], [7], [-2]], dtype=tf.float32)
# retrieve x and y min max
xmin, xmax = tf.reduce_min(x, axis=0), tf.reduce_max(x, axis=0)
ymin, ymax = tf.reduce_min(y, axis=0), tf.reduce_max(y, axis=0)
batch_size = 2
ds = tf.data.Dataset.from_tensor_slices((x, y)).batch(batch_size)
# create the model
model = tf.keras.Sequential(
[
tf.keras.layers.Input(shape=(2,)),
tf.keras.layers.Dense(units=3, activation='relu'),
tf.keras.layers.Dense(units=1)
]
)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
def standard_loss(y_batch, y_pred):
# unscale y_pred (note that y_batch has never been scaled)
y_pred_unsc = y_pred * (ymax - ymin) + ymin
return tf.reduce_mean(tf.square(y_batch - y_pred_unsc)
# training loop
epochs = 1
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch, ))
for step, (x_batch, y_batch) in enumerate(ds):
with tf.GradientTape() as tape:
# scale data (we see that I do not quit tensorflow operations)
x_scale = (x_batch - xmin)/(xmax - xmin)
y_pred = model(x_scale, training=True)
loss_value = standard_loss(y_batch, y_pred)
grads = tape.gradient(loss_value, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))