I have 2 datasets and a weight array.
(train_X, validation_X, train_Y, validation_Y
and sampleW
)
The X sets are 3 dimensional, while the Y sets are 2 dimensional numpy-arrays.
sampleW
is a one dimensional numpy
array.
How do I successfully migrate from fit_generator()
to fit()
function?
In terms of:
fit(x=None, y=None
," for train_X, train_Y
?validation_X, validation_Y
)sampleW
the same way as before?fit()
?this is a minimal reproducable (I am currently struggeling to find out why any other batchsize but 1 gives an error, but >1 should also be usable)
# -*- coding: utf-8 -*-
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,BatchNormalization
import tensorflow as tf, numpy as np; from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
tensorboard_path= r"C:\Users\user\documents\session" # <--- your path
checkpoint_path = tensorboard_path
BATCH_SIZE = 1
EPOCHS, Input_shape, labels = 3, (20,4),6
train_X,train_Y = np.asarray([np.random.random(Input_shape) for x in range(100)]), np.random.random((100,labels))
validation_X,validation_Y = np.asarray([np.random.random(Input_shape) for x in range(50)]), np.random.random((50,labels))
sampleW = np.random.random((100,1))
class CustomGenerator_SampleW(tf.keras.utils.Sequence) :
def __init__(self, list_x, labels, batch_size, sample_weights=None) :
self.labels = labels
self.batch_size = batch_size
self.list_x = list_x
self.sample_weights = sample_weights
def __len__(self) :
return (np.ceil(len(self.list_x) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
batch_x = self.list_x[idx * self.batch_size : (idx+1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
batch_weight = self.sample_weights[idx * self.batch_size : (idx+1) * self.batch_size]
return np.array(batch_x),np.array(batch_y), np.array(batch_weight)
class CustomGenerator(tf.keras.utils.Sequence) :
def __init__(self, list_x, labels, batch_size) :
self.labels = labels
self.batch_size = batch_size
self.list_x = list_x
def __len__(self) :
return (np.ceil(len(self.list_x) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx) :
batch_x = self.list_x[idx * self.batch_size : (idx+1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
return np.array(batch_x),np.array(batch_y)
model = Sequential()
model.add(LSTM(242, input_shape=Input_shape, return_sequences=True))
model.add(Dropout(0.3)); model.add(BatchNormalization())
model.add(LSTM(242, return_sequences=True))
model.add(Dropout(0.3)); model.add(BatchNormalization())
model.add(Dense(labels, activation='tanh')); model.add(Dropout(0.3))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(loss='mean_absolute_error',optimizer=opt,metrics=['mse'])
if sampleW is not None:
train_batch_gen = CustomGenerator_SampleW(train_X, train_Y, BATCH_SIZE, sample_weights=sampleW)
else: train_batch_gen = CustomGenerator(train_X, train_Y, BATCH_SIZE)
validation_batch_gen = CustomGenerator(validation_X, validation_Y, BATCH_SIZE)
tensorboard = TensorBoard(tensorboard_path)
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
model.fit_generator(train_batch_gen, steps_per_epoch=None, epochs=EPOCHS,
validation_data = validation_batch_gen, callbacks=[tensorboard,checkpoint])
This is due to the shape mismatch of your model output and labels provided.
Model architecture:
As you can see the output shape of your model is (batch_size, 20, 6)
and the shape of your labels is (batch_size, 6)
which are not compatible.
Why was this working for batch_size = 1?
This is because TensorFlow was using a technique called broadcasting.
For eg:
x = np.ones(shape = (1,20,6))
array([[[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1., 1.]]])
y = np.ones(shape = (1,6))
array([[1., 1., 1., 1., 1., 1.]])
y-x
array([[[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0.]]])
See this for more information.
But broadcasting was no longer possible when you used batch_size = 10
.
Code:
x = np.ones(shape = (10,20,6))
y = np.ones(shape = (10,6))
y-x
Output:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-102-4a65323a80fa> in <module>
1 x = np.ones(shape = (10,20,6))
2 y = np.ones(shape = (10,6))
----> 3 y-x
ValueError: operands could not be broadcast together with shapes (10,6) (10,20,6)
The shape of your model can be fixed by adding a flatten layer after lstm layer to convert a 2d vector into a 1d vector.
Code:
model = Sequential()
model.add(LSTM(242, input_shape=Input_shape, return_sequences=True))
model.add(Dropout(0.3)); model.add(BatchNormalization())
model.add(LSTM(242, return_sequences=True))
model.add(Dropout(0.3)); model.add(BatchNormalization())
model.add(Flatten())
model.add(Dropout(0.3))
model.add(Dense(labels, activation='tanh'))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(loss='mean_absolute_error',optimizer=opt,metrics=['mse'])
tf.keras.utils.plot_model(model, 'my_first_model.png', show_shapes=True)
Model architecture:
Finally using model.fit()
:
model.fit(train_batch_gen, epochs=EPOCHS, validation_data = validation_batch_gen)
Output:
Epoch 1/3
2/2 [==============================] - 1s 708ms/step - loss: 0.2891 - mse: 0.5739 - val_loss: 0.4078 - val_mse: 0.2461
Epoch 2/3
2/2 [==============================] - 0s 46ms/step - loss: 0.2229 - mse: 0.3151 - val_loss: 0.3867 - val_mse: 0.2225
Epoch 3/3
2/2 [==============================] - 0s 49ms/step - loss: 0.2315 - mse: 0.3341 - val_loss: 0.3813 - val_mse: 0.2161