Search code examples
python-3.xtensorflowmachine-learningkeraslstm

Custom Peephole LSTM Layer Returns TypeError


I have built a custom Peephole LSTM Layer for TensorFlow:

import tensorflow as tf

class PeepholeLSTM(tf.keras.layers.Layer):
    def __init__(self, units, activation='tanh', return_sequences=False, **kwargs):
        super(PeepholeLSTM, self).__init__(**kwargs)
        self.units = units
        self.activation = tf.keras.activations.get(activation)
        self.return_sequences = return_sequences

    def build(self, input_shape):
        input_dim = input_shape[-1]

        # Create weights for the LSTM cell
        self.Wf = self.add_weight(name='Wf', shape=(input_dim, self.units), initializer='glorot_uniform')
        self.Uf = self.add_weight(name='Uf', shape=(self.units, self.units), initializer='orthogonal')
        self.bf = self.add_weight(name='bf', shape=(self.units,), initializer='zeros')

        self.Wi = self.add_weight(name='Wi', shape=(input_dim, self.units), initializer='glorot_uniform')
        self.Ui = self.add_weight(name='Ui', shape=(self.units, self.units), initializer='orthogonal')
        self.bi = self.add_weight(name='bi', shape=(self.units,), initializer='zeros')

        self.Wc = self.add_weight(name='Wc', shape=(input_dim, self.units), initializer='glorot_uniform')
        self.Uc = self.add_weight(name='Uc', shape=(self.units, self.units), initializer='orthogonal')
        self.bc = self.add_weight(name='bc', shape=(self.units,), initializer='zeros')

        self.Wo = self.add_weight(name='Wo', shape=(input_dim, self.units), initializer='glorot_uniform')
        self.Uo = self.add_weight(name='Uo', shape=(self.units, self.units), initializer='orthogonal')
        self.bo = self.add_weight(name='bo', shape=(self.units,), initializer='zeros')

        self.c_peephole = self.add_weight(name='c_peephole', shape=(self.units,), initializer='zeros')
        self.o_peephole = self.add_weight(name='o_peephole', shape=(self.units,), initializer='zeros')

        self.built = True

    def call(self, inputs):
        # Initialize states
        batch_size, seq_length, _ = inputs.shape
        h_tm1 = tf.zeros(shape=(batch_size, self.units))
        c_tm1 = tf.zeros(shape=(batch_size, self.units))

        outputs = []

        for t in range(seq_length):
            x_t = inputs[:, t, :]
            f = tf.sigmoid(tf.matmul(x_t, self.Wf) + tf.matmul(h_tm1, self.Uf) + self.bf + self.c_peephole * c_tm1)
            i = tf.sigmoid(tf.matmul(x_t, self.Wi) + tf.matmul(h_tm1, self.Ui) + self.bi)
            c = f * c_tm1 + i * self.activation(tf.matmul(x_t, self.Wc) + tf.matmul(h_tm1, self.Uc) + self.bc)
            o = tf.sigmoid(tf.matmul(x_t, self.Wo) + tf.matmul(h_tm1, self.Uo) + self.bo + self.o_peephole * c)
            h = o * self.activation(c)

            outputs.append(h)
            h_tm1 = h
            c_tm1 = c

        if self.return_sequences:
            return tf.stack(outputs, axis=1)
        else:
            return h

Which I compile using the following:

def LSTM(inpshape, oupshape, NHLayer=7, LR=0.002):
    Input = keras.Input(shape=inpshape)
    Nnode = [int(round(o)) for o in np.linspace(inpshape[1], oupshape[1], NHLayer)]
    rnn = PeepholeLSTM(inpshape[1], activation=tf.nn.elu, return_sequences=True)(Input)
    for o, n in enumerate(Nnode):
        rnn = PeepholeLSTM(n, activation=tf.nn.elu, return_sequences=True)(rnn)
        if o % 3 == 0 and o != NHLayer - 1:
            rnn = keras.layers.Dropout(rate=0.2)(rnn)
    Output = keras.layers.Dense(oupshape[1], activation=keras.activations.linear)(rnn)
    model = keras.Model(inputs=Input, outputs=Output)
    optimizer = keras.optimizers.RMSprop(LR)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
    return model

When I run this code I get the following error:

Cell In[3], line 84, in LSTM(inpdata, oupdata, NHLayer, LR)
     82 Input = keras.Input(shape=inpshape)
     83 Nnode = [int(round(o)) for o in np.linspace(inpshape[1], oupshape[1], NHLayer)]
---> 84 rnn = PeepholeLSTM(inpshape[1], activation=tf.nn.elu, return_sequences=True)(Input)
     85 for o, n in enumerate(Nnode):
     86     rnn = PeepholeLSTM(max([n, Nnode[0]]), activation=tf.nn.elu, return_sequences=True)(rnn)

File ~/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File /tmp/__autograph_generated_filelmzr4_60.py:12, in outer_factory.<locals>.inner_factory.<locals>.tf__call(self, inputs)
     10 ag__.ld(print)(ag__.ld(self).units, ag__.ld(inputs).shape)
     11 batch_size, seq_length, _ = ag__.ld(inputs).shape
---> 12 h_tm1 = ag__.converted_call(ag__.ld(tf).zeros, (), dict(shape=(ag__.ld(batch_size), ag__.ld(self).units)), fscope)
     13 c_tm1 = ag__.converted_call(ag__.ld(tf).zeros, (), dict(shape=(ag__.ld(batch_size), ag__.ld(self).units)), fscope)
     14 outputs = []

TypeError: Exception encountered when calling layer "peephole_lstm" (type PeepholeLSTM).

in user code:

    File "/home/hgc-swin/Thesan1/peephole.py", line 39, in call  *
        h_tm1 = tf.zeros(shape=(batch_size, self.units))

    TypeError: Expected int32, but got None of type 'NoneType'.


Call arguments received by layer "peephole_lstm" (type PeepholeLSTM):
  • inputs=tf.Tensor(shape=(None, 81, 6), dtype=float32)

I realise this is coming from the batch_size parameter being a None variable through definition, but isn't that meant to be the case such that the model can be applied to a dataset of any size? How would I modify this?

Using TensorFlow version 2.13.0


Solution

  • Managed to fix this myself. There were two issues. First, the TensorFlow shape object was needed to parse the dimensions. Second, a TensorArray was necessary to retain time iterations. Working model is as follows:

    import tensorflow as tf
    
    class PeepholeLSTM(tf.keras.layers.Layer):
        def __init__(self, units, activation='tanh', return_sequences=False, **kwargs):
            super(PeepholeLSTM, self).__init__(**kwargs)
            self.units = units
            self.activation = tf.keras.activations.get(activation)
            self.return_sequences = return_sequences
    
        def build(self, input_shape):
            input_dim = input_shape[-1]
    
            # Create weights for the LSTM cell
            self.Wf = self.add_weight(name='Wf', shape=(input_dim, self.units), initializer='glorot_uniform')
            self.Uf = self.add_weight(name='Uf', shape=(self.units, self.units), initializer='orthogonal')
            self.bf = self.add_weight(name='bf', shape=(self.units,), initializer='zeros')
    
            self.Wi = self.add_weight(name='Wi', shape=(input_dim, self.units), initializer='glorot_uniform')
            self.Ui = self.add_weight(name='Ui', shape=(self.units, self.units), initializer='orthogonal')
            self.bi = self.add_weight(name='bi', shape=(self.units,), initializer='zeros')
    
            self.Wc = self.add_weight(name='Wc', shape=(input_dim, self.units), initializer='glorot_uniform')
            self.Uc = self.add_weight(name='Uc', shape=(self.units, self.units), initializer='orthogonal')
            self.bc = self.add_weight(name='bc', shape=(self.units,), initializer='zeros')
    
            self.Wo = self.add_weight(name='Wo', shape=(input_dim, self.units), initializer='glorot_uniform')
            self.Uo = self.add_weight(name='Uo', shape=(self.units, self.units), initializer='orthogonal')
            self.bo = self.add_weight(name='bo', shape=(self.units,), initializer='zeros')
    
            self.c_peephole = self.add_weight(name='c_peephole', shape=(self.units,), initializer='zeros')
            self.o_peephole = self.add_weight(name='o_peephole', shape=(self.units,), initializer='zeros')
    
            self.built = True
    
        def call(self, inputs):
            # Initialize states
            batch_size = tf.shape(inputs)[0]
            seq_length = tf.shape(inputs)[1]
            h_tm1 = tf.zeros(shape=(batch_size, self.units))
            c_tm1 = tf.zeros(shape=(batch_size, self.units))
    
            outputs = tf.TensorArray(tf.float32, size=seq_length)
    
            for t in range(seq_length):
                x_t = inputs[:, t, :]
                f = tf.sigmoid(tf.matmul(x_t, self.Wf) + tf.matmul(h_tm1, self.Uf) + self.bf + self.c_peephole * c_tm1)
                i = tf.sigmoid(tf.matmul(x_t, self.Wi) + tf.matmul(h_tm1, self.Ui) + self.bi)
                c = f * c_tm1 + i * self.activation(tf.matmul(x_t, self.Wc) + tf.matmul(h_tm1, self.Uc) + self.bc)
                o = tf.sigmoid(tf.matmul(x_t, self.Wo) + tf.matmul(h_tm1, self.Uo) + self.bo + self.o_peephole * c)
                h = o * self.activation(c)
    
                outputs = outputs.write(t, h)
                h_tm1 = h
                c_tm1 = c
    
            if self.return_sequences:
                return outputs.stack()
            else:
                return h