Search code examples
pythontensorflowlstmautoencoder

Training autoencoder for variant length time series - Tensorflow


I am trying to train a LSTM model to reconstruct time series data. I have a data set of ~1800 univariant time-series. Basically I'm trying to solve a problem similar to this one Anomaly detection in ECG plots, but my time series have different lengths.

I used this approach to deal with variant length: How to apply LSTM-autoencoder to variant-length time-series data? and this approach to split the input data based on shape: Keras misinterprets training data shape

When looping over the data and fitting a model for every shape. is the model eventually only based on the last shape it trained on or is it using all the data to train the final model?

How would I train the model on all input data regardless shape of data? I know I can add padding but I am trying to use the data as is at this point. Any suggestions or other approaches to deal with different length on timeseries? (It is not an issue of time sampling it is more of one timeseries started recording on day X and some only on day X+100)

Here is the code I am using for my autoencoder:

import keras.backend as K
from keras.layers import (Input, Dense, TimeDistributed, LSTM, GRU, Dropout, merge,
                      Flatten, RepeatVector, Bidirectional, SimpleRNN, Lambda)


def encoder(model_input, layer, size, num_layers, drop_frac=0.0, output_size=None,
        bidirectional=False):
    """Encoder module of autoencoder architecture"""
   if output_size is None:
      output_size = size
   encode = model_input
   for i in range(num_layers):
       wrapper = Bidirectional if bidirectional else lambda x: x
       encode = wrapper(layer(size, name='encode_{}'.format(i),
                           return_sequences=(i < num_layers - 1)))(encode)
       if drop_frac > 0.0:
          encode = Dropout(drop_frac, name='drop_encode_{}'.format(i))(encode)
  encode = Dense(output_size, activation='linear', name='encoding')(encode)
  return encode


def repeat(x):

   stepMatrix = K.ones_like(x[0][:,:,:1]) #matrix with ones, shaped as (batch, steps, 1)
   latentMatrix = K.expand_dims(x[1],axis=1) #latent vars, shaped as (batch, 1, latent_dim)

   return K.batch_dot(stepMatrix,latentMatrix)


def decoder(encode, layer, size, num_layers, drop_frac=0.0, aux_input=None,
        bidirectional=False):
   """Decoder module of autoencoder architecture"""

   decode = Lambda(repeat)([inputs,encode])
   if aux_input is not None:
       decode = merge([aux_input, decode], mode='concat')

   for i in range(num_layers):
       if drop_frac > 0.0 and i > 0:  # skip these for first layer for symmetry
           decode = Dropout(drop_frac, name='drop_decode_{}'.format(i))(decode)
       wrapper = Bidirectional if bidirectional else lambda x: x
       decode = wrapper(layer(size, name='decode_{}'.format(i),
                           return_sequences=True))(decode)

   decode = TimeDistributed(Dense(1, activation='linear'), name='time_dist')(decode)
   return decode


inputs = Input(shape=(None, 1))
encoded = encoder(inputs,LSTM,128, 2, drop_frac=0.0, output_size=None, bidirectional=False)
decoded = decoder(encoded, LSTM, 128, 2, drop_frac=0.0, aux_input=None,
          bidirectional=False,)


sequence_autoencoder = Model(inputs, decoded)
sequence_autoencoder.compile(optimizer='adam', loss='mae')


trainByShape = {}
for item in train_data:
  if item.shape in trainByShape:
    trainByShape[item.shape].append(item)
  else:
    trainByShape[item.shape] = [item]

for shape in trainByShape:
    modelHistory =sequence_autoencoder.fit(
              np.asarray(trainByShape[shape]), 
              np.asarray(trainByShape[shape]),
              epochs=100, batch_size=1, validation_split=0.15)

Solution

  • use a bidirectional lstm and increase the number of parameters to gain accuracy. I increased the latent_dim to 1000 and it fit the data closely. More hardware and more memory.

    def create_dataset(dataset, look_back=3):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back)]
            dataX.append(a)
            dataY.append(dataset[i + look_back])
        return np.array(dataX), np.array(dataY)
    
    COLUMNS=['Open']
    dataset=eqix_df[COLUMNS]
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(np.array(dataset).reshape(-1,1))
    
    train_size = int(len(dataset) * 0.70)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
    
    look_back=10
    trainX=[]
    testX=[]
    y_train=[]
    
    trainX, y_train = create_dataset(train, look_back)
    testX, y_test = create_dataset(test, look_back)
    
    X_train = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    X_test = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    latent_dim=700
    n_future=1
    
    model = Sequential()
    
    model.add(Bidirectional(LSTM(units=latent_dim, return_sequences=True, 
                                 input_shape=(X_train.shape[1], 1))))
    
    #LSTM 1
    model.add(Bidirectional(LSTM(latent_dim,return_sequences=True,dropout=0.4,recurrent_dropout=0.4,name='lstm1'))) 
    
    #LSTM 2 
    model.add(Bidirectional(LSTM(latent_dim,return_sequences=True,dropout=0.2,recurrent_dropout=0.4,name='lstm2')))
    
    #LSTM 3 
    model.add(Bidirectional(LSTM(latent_dim, return_sequences=False,dropout=0.2,recurrent_dropout=0.4,name='lstm3')))
    
    model.add(Dense(units = n_future))
    
    model.compile(optimizer="adam", loss="mean_squared_error", metrics=["acc"])
    
    history=model.fit(X_train, y_train,epochs=50,verbose=0)
    
    plt.plot(history.history['loss'])
    plt.title('loss accuracy')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    #print(X_test)
    prediction = model.predict(X_test)
    
    # shift train predictions for plotting
    trainPredictPlot = np.empty_like(dataset)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(prediction)+look_back, :] = prediction
    # shift test predictions for plotting
    #plt.plot(scaler.inverse_transform(dataset))
    plt.plot(trainPredictPlot, color='red')
    #plt.plot(testPredictPlot)
    #plt.legend(['Actual','Train','Test'])
    x=np.linspace(look_back,len(prediction)+look_back,len(y_test))
    plt.plot(x,y_test)
    plt.show()