I have created a model with an LSTM layer as shown below and want to get the internal state (hidden state and cell state) after the training step and save it. After the training step, I will use the network for a prediction and want to reinitialize the LSTM with the saved internal state before the next training step. This way I can continue from the same point after each training step. I haven't been able to find something helpful for the current version of tensoflow, i.e 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()
I have managed to save the internal state of the LSTM after the training step and reinitialize the LSTM with the saved internal states before the next training step. You can create a variable and set its value to the currently stored value in a variable. How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
A call for prediction changes the internal states, however by following these steps, you can restore the internal states of RNN to what it was before the prediction.