Search code examples
pythontensorflowkerasrecurrent-neural-networkcumulative-sum

Tensorflow simple cumulative sum of product RNN cell


I am trying to build a Tensorflow model that calculates a cumulative sum of products of two of the input features, i.e. predicting on only (1,2) should return 2, and then predicting on (2,2) should give 6=(1 * 2) + (2 * 2)

model.predict([1,2])
>>> 2

model.predict([2,2])
>>> 6

model.reset_states()
model.predict([2,2])
>>> 4

I have tried the following:

import numpy as np
import tensorflow as tf


class MinimalRNNCell(tf.keras.layers.Layer):

    def __init__(self, units, **kwargs):
        self.states = np.array([0])
        self.state = np.array([0])
        self.units = units
        self.state_size = units
        super(MinimalRNNCell, self).__init__(**kwargs)

    def call(self, inputs, states):
        prev_output = states[0]
        output = tf.math.add(prev_output,inputs)
        
        return output, [output]
    

# Define model
#input
inp = tf.keras.layers.Input(shape=(2,))
#split input
x1,x2 = tf.split(inp, num_or_size_splits=2, axis=1)
#calculate product
product = tf.math.multiply(x1,x2)
#reshape product
time_product = tf.keras.layers.Reshape((1,1))(product)
#Define memory cell and layer
memory_product = MinimalRNNCell(units=1)
layer_product = tf.keras.layers.RNN(memory_product)
#calculate cumulative product
cumulative_product = layer_product(time_product)

output = cumulative_product

model = tf.keras.models.Model(inp, output)


if __name__=="__main__":
    x = np.array([
        [1, 2],
        [2, 2]
    ])

    model.compile()
    y = model.predict(x)
    print()
    print("outptut: ", y)
>>> [[2],
     [4]]

Note the reason i split inputs is because this is the minimal model that should work. The model i am implementing is more complex and has more steps interlinked but it is the accumulation i can't get to work. I feel like something like a cumulative sum is easily implemented using RNN or LSTM cells, but it does not work how I expect it to.


Solution

  • If you just want to accumulate the product then initialize the weight on build() method then add each time the result of model.predict() to it. Here is the code...

    import tensorflow as tf
    tf.keras.backend.clear_session()
    class CumulativeProduct(tf.keras.layers.Layer):
        def __init__(self):
            super(CumulativeProduct, self).__init__()
    
        def build(self, input_shape):
            self.cumulative_product = self.add_weight(shape=(), initializer=tf.keras.initializers.Zeros(), trainable=False)
    
        def call(self, inputs):
            self.cumulative_product.assign(self.cumulative_product + tf.reduce_sum(tf.reduce_prod(inputs, axis=1)))
            return self.cumulative_product
        
        def reset_states(self):
            self.cumulative_product.assign(0.)
    
    inp = tf.keras.layers.Input(shape=(2,))
    cumulative_product = CumulativeProduct()
    cum_prod = cumulative_product(inp)
    model = tf.keras.models.Model(inp, cum_prod)
    x = [[1, 2], [2,2]]
    model.predict(x)
    
    6.0
    
    x = [[2, 2]]
    model.predict(x)
    
    10.0
    
    #to reset the model variable
    cumulative_product.reset_states()