Here is a very simple example of LSTM in stateless mode and we train it on a very simple sequence [0–>1]
and [0–>2]
Any idea why it won’t converge in stateless mode.?
We have a batch of size 2 with 2 samples and it supposed to keep the state within the batch. When predicting we would like to receive successively 1 and 2.
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy
# define sequences
seq = [0, 1, 0, 2]
# convert sequence into required data format.
#We are going to extract 2 samples [0–>1] and [0–>2] and convert them into one hot vectors
seqX=numpy.array([[( 1. , 0. , 0.)], [( 1. , 0. , 0.)]])
seqY=numpy.array([( 0. , 1. , 0.) , ( 0. , 0. , 1.)])
# define LSTM configuration
n_unique = len(set(seq))
n_neurons = 20
n_batch = 2
n_features = n_unique #which is =3
# create LSTM
model = Sequential()
model.add(LSTM(n_neurons, input_shape=( 1, n_features) ))
model.add(Dense(n_unique, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
# train LSTM
model.fit(seqX, seqY, epochs=300, batch_size=n_batch, verbose=2, shuffle=False)
# evaluate LSTM
print('Sequence')
result = model.predict_classes(seqX, batch_size=n_batch, verbose=0)
for i in range(2):
print('X=%.1f y=%.1f, yhat=%.1f' % (0, i+1, result[i]))
Example 2 Here I want to clarify a bit what result I want.
Same code example but in stateful mode (stateful=True). It works perfectly. We feed the network 2 times with zeros and get 1 and then 2. But I want to get the same result in stateless mode as it supposed to keep the state within the batch.
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy
# define sequences
seq = [0, 1, 0, 2]
# convert sequences into required data format
seqX=numpy.array([[( 1. , 0. , 0.)], [( 1. , 0. , 0.)]])
seqY=numpy.array([( 0. , 1. , 0.) , ( 0. , 0. , 1.)])
# define LSTM configuration
n_unique = len(set(seq))
n_neurons = 20
n_batch = 1
n_features = n_unique
# create LSTM
model = Sequential()
model.add(LSTM(n_neurons, batch_input_shape=(n_batch, 1, n_features), stateful=True ))
model.add(Dense(n_unique, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
# train LSTM
for epoch in range(300):
model.fit(seqX, seqY, epochs=1, batch_size=n_batch, verbose=2, shuffle=False)
model.reset_states()
# evaluate LSTM
print('Sequence')
result = model.predict_classes(seqX, batch_size=1, verbose=0)
for i in range(2):
print('X=%.1f y=%.1f, yhat=%.1f' % (0, i+1, result[i]))
As a correct result we should get:
Sequence
X=0.0 y=1.0, yhat=1.0
X=0.0 y=2.0, yhat=2.0
You must feed one sequence with two steps instead of two sequences with one step:
seqX.shape = (1,2,3)
seqX.shape = (2,1,3)
The input shape is (numberOfSequences, stepsPerSequence, featuresPerStep)
seqX = [[[1,0,0],[1,0,0]]]
If you want to get both steps for y as output, you must use return_sequences=True
.
LSTM(n_neurons, input_shape=( 1, n_features), return_sequences=True)
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import numpy
# define sequences
seq = [0, 1, 0, 2]
# convert sequence into required data format.
#We are going to extract 2 samples [0–>1] and [0–>2] and convert them into one hot vectors
seqX=numpy.array([[[ 1. , 0. , 0.], [ 1. , 0. , 0.]]])
seqY=numpy.array([[[0. , 1. , 0.] , [ 0. , 0. , 1.]]])
#shapes are (1,2,3) - 1 sequence, 2 steps, 3 features
# define LSTM configuration
n_unique = len(set(seq))
n_neurons = 20
n_features = n_unique #which is =3
#no need for batch size
# create LSTM
model = Sequential()
model.add(LSTM(n_neurons, input_shape=( 2, n_features),return_sequences=True))
#the input shape must have two steps
model.add(Dense(n_unique, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='Adam')
# train LSTM
model.fit(seqX, seqY, epochs=300, verbose=2)
#no shuffling and no batch size needed.
# evaluate LSTM
print('Sequence')
result = model.predict_classes(seqX, verbose=0)
print(seqX)
print(result) #all steps are predicted in a single array (with return_sequences=True)