I've been struggling for while coding the network in the picture below.
The use case is dedicated for time series:
I'm struggling with building such a network, keeping two loss functions:
From a dataset of 1677 time series, with 61440 time status each. I've downsampled all the timeseries with a rolling mean (to mitigate the 61440 features), and reshaped them with a sliding windows of 200 length, which gave me an InputShape of (989300, 1, 200), which is the shape of the samples entering the VAE (989300 sequences). The output of the network is the next time status of my sequence. For example, given a sequence of 200 length, the LSTM regressor part predicts the 201st status, ie the value coming right after this sequence.
My shapes (Xtrain, Xtest, ytrain, ytest):
((989300, 1, 200), (286897, 1, 200), (989300,), (286897,))
Here is my code. I know that it might be not that clean, I'm trying to make it work first.
My imports
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, TimeDistributed, Input, RepeatVector, LSTM
My loss function, combining VAE loss and LSTM loss, with a lambda parameter:
def vae_loss2_(input_x, decoder1, y_pred, z_log_sigma, z_mean, lambd):
""" Calculate loss = reconstruction loss + KL loss for each data in minibatch """
recon = K.sum(K.binary_crossentropy(input_x, decoder1))
# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
kl = 0.5 * K.sum(K.exp(z_log_sigma) + K.square(z_mean) - 1. - z_log_sigma)
lstm = tf.keras.losses.MSE(decoder1, y_pred)
return (recon + kl) + lambd*lstm
My sampling function, with which the VAE samples from the latent space:
def sampling(args):
z_mean, z_log_sigma = args
latent_dim = 1
batch_size = K.shape(z_mean)[0]
epsilon = K.random_normal(shape=(batch_size, K.shape(z_mean)[1], latent_dim), mean=0., stddev=1.)
return z_mean + z_log_sigma * epsilon
And finally, here all my code, with both networks:
latent_dim = 1
timesteps, features = 1, 200
# timesteps, features
input_x = Input(shape= (timesteps, features))
#Encoder
h1 = Dense(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(input_x)
h1 = Dense(100, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)
h1 = Dense(50, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)
h1 = Dense(20, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(h1)
#z_layer
z_mean = Dense(latent_dim)(h1)
z_log_sigma = Dense(latent_dim)(h1)
z = Lambda(sampling)([z_mean, z_log_sigma])
#Decoder
decoder1 = Dense(20, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(z)
decoder1 = Dense(50, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = Dense(100, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = Dense(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal')(decoder1)
decoder1 = TimeDistributed(Dense(features))(decoder1)
# LSTM network
lstm1 = LSTM(150, activation='relu', kernel_initializer='random_normal', bias_initializer='random_normal', return_sequences=True)(decoder1)
lstm1 = Dense(1)(lstm1)
finalModel = Model(input_x, lstm1)
finalModel.add_loss(vae_loss2_(input_x, decoder1, lstm1, z_log_sigma, z_mean, 0.2))
finalModel.compile(loss=None, optimizer='adam')
history = finalModel.fit(Xtrain_, ytrain_, epochs=70, batch_size = 2500, validation_data = (Xtest_,ytest_))
Executing, this code, raises the following error, as the fit step doesn't expect ytrain and ytest, for the next timestamp prediction:
WARNING:tensorflow:Output dense_593 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to dense_593.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-207-47c2c28976f0> in <module>
43 #a = np.load('atrain.npy')
44
---> 45 history = finalModel.fit(Xtrain_, ytrain_, epochs=70, batch_size = 2500, validation_data = (Xtest_,ytest_))
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
222 validation_data=validation_data,
223 validation_steps=validation_steps,
--> 224 distribution_strategy=strategy)
225
226 total_samples = _get_total_number_of_samples(training_data_adapter)
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
545 max_queue_size=max_queue_size,
546 workers=workers,
--> 547 use_multiprocessing=use_multiprocessing)
548 val_adapter = None
549 if validation_data:
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
592 batch_size=batch_size,
593 check_steps=False,
--> 594 steps=steps)
595 adapter = adapter_cls(
596 x,
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2517 shapes=None,
2518 check_batch_axis=False, # Don't enforce the batch size.
-> 2519 exception_prefix='target')
2520
2521 # Generate sample-wise weight values given the `sample_weight` and
/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
487 raise ValueError(
488 'Error when checking model ' + exception_prefix + ': '
--> 489 'expected no data, but got:', data)
490 return []
491 if data is None:
ValueError: ('Error when checking model target: expected no data, but got:', array([0.49538032, 0.55329189, 0.47183994, ..., 0.84650205, 0.89713042,
0.87897429]))
Thank you very much for your help,
I solved my issue: with add_loss function, the model fit doesn't expect any ytrain or ytest, as it doesn't have a loss in compile function. Putting ytrain and ytest inside fit method, is compelling a loss function in form of loss_fn(ytrue, ypred): return MSE(ytrue, ypred), like the classic keras.losses.MSE