Search code examples
pythonstringtensorflowlookup-tablespython-class

Dtype of argument `keys` must be <dtype: 'string'>, received: <dtype: 'int32'>


I am trying this in google Colab The problem is with the lookup table If I run it separately, it allows the integers to be keys as well but in this model class, I can't seem to pass the integer values as keys

class SimpleRecommender(tf.keras.Model):
    def __init__(self, dummy_users, products,lenght_of_embedding):
        super(SimpleRecommender, self).__init__()
        self.products = tf.constant(products, dtype=tf.int32)
        self.dummy_users = tf.constant(dummy_users, dtype=tf.string)
        self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users,range(len(dummy_users))), -1)
        self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(tf.constant(self.products,dtype=tf.int32), range(len(products))), -1)
        
        self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), lenght_of_embedding)
        self.product_embedding = tf.keras.layers.Embedding(len(products), lenght_of_embedding)
        
        self.dot=tf.keras.layers.Dot(axes=-1)
    def call(self,inputs):
        user=inputs[0]
        products=inputs[1]
        user_embedding_index=self.dummy_user_table.lookup(user)
        prod_embedding_index=self.dummy_user_table.lookup(products)

        user_embedding_values=self.user_embedding(user_embedding_index)
        prod_embedding_values=self.product_embedding(prod_embedding_index)

        return self.dot([user_embedding_values,prod_embedding_values])

    @tf.function
    def call_item_item(self, product):
        product_x = self.product_table.lookup(product)
        pe = tf.expand_dims(self.product_embedding(product_x), 0)
        
        all_pe = tf.expand_dims(self.product_embedding.embeddings, 0)#note this only works if the layer has been built!
        scores = tf.reshape(self.dot([pe, all_pe]), [-1])
        
        top_scores, top_indices = tf.math.top_k(scores, k=100)
        top_ids = tf.gather(self.products, top_indices)
        return top_ids, top_scores 

For reference

dummy_users
array(['pmfkU4BNZhmtLgJQwJ7x', 'UDRRwOlzlWVbu7H8YCCi',
       'QHGAef0TI6dhn0wTogvW', ..., 'lcORJ5hemOZc1iGo9z7k',
       '5CqDquDAszqJp27P7AL8', 'SSPNYxJMfuKhoe1dg24m'], dtype='<U20')

and

products
array([ 8650774,  9306139,  9961521, ..., 12058614, 12058615, 11927550])

When I run the following code

sr1=SimpleRecommender(dummy_users,products,15)
sr1([tf.constant([['lcORJ5hemOZc1iGo9z7k'],['QHGAef0TI6dhn0wTogvW']]),
     tf.constant([[8650774, 9306139, 9961521],[12058614, 12058615, 11927550]])])

I get this error

    TypeErrorTraceback (most recent call last)<ipython-input-24-5cd8170aa0e4> in <module>()

  1 sr1=SimpleRecommender(dummy_users,products,15)
  2 sr1([tf.constant([['lcORJ5hemOZc1iGo9z7k'],['QHGAef0TI6dhn0wTogvW']]),
  ----> 3      tf.constant([[8650774, 9306139, 9961521],[12058614, 12058615, 11927550]])])

    1 frames
    <ipython-input-21-1cc8d8700b6c> in call(self, inputs)
 15         products=inputs[1]
 16         user_embedding_index=self.dummy_user_table.lookup(user)
 ---> 17         prod_embedding_index=self.dummy_user_table.lookup(products)
 18 
 19         user_embedding_values=self.user_embedding(user_embedding_index)

    TypeError: Exception encountered when calling layer "simple_recommender" (type SimpleRecommender).
    Dtype of argument `keys` must be <dtype: 'string'>, received: <dtype: 'int32'>
    Call arguments received:
    • inputs=['tf.Tensor(shape=(2, 1), dtype=string)', 'tf.Tensor(shape=(2, 3), dtype=int32)']

Any help will be much appreciated Thanks


Solution

  • I think you are using the wrong lookup table in the call method. Try replacing it with this:

    def call(self, inputs):
        user = inputs[0]
        products = inputs[1]
        user_embedding_index = self.dummy_user_table.lookup(user)
        prod_embedding_index = self.product_table.lookup(products)
    
        user_embedding_values = self.user_embedding(user_embedding_index)
        prod_embedding_values = self.product_embedding(prod_embedding_index)
    
        return self.dot([user_embedding_values, prod_embedding_values])
    

    Whole working example:

    import tensorflow as tf
    
    class SimpleRecommender(tf.keras.Model):
        def __init__(self, dummy_users, products,lenght_of_embedding):
            super(SimpleRecommender, self).__init__()
            self.products = tf.constant(products, dtype=tf.int32)
            self.dummy_users = tf.constant(dummy_users, dtype=tf.string)
            self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users,range(len(dummy_users))), -1)
            self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(tf.constant(self.products,dtype=tf.int32), range(len(products))), -1)
            
            self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), lenght_of_embedding)
            self.product_embedding = tf.keras.layers.Embedding(len(products), lenght_of_embedding)
            
            self.dot=tf.keras.layers.Dot(axes=-1)
        def call(self,inputs):
            user=inputs[0]
            products=inputs[1]
            user_embedding_index=self.dummy_user_table.lookup(user)
            prod_embedding_index=self.product_table.lookup(products)
    
            user_embedding_values=self.user_embedding(user_embedding_index)
            prod_embedding_values=self.product_embedding(prod_embedding_index)
    
            return self.dot([user_embedding_values,prod_embedding_values])
    
    dummy_users = tf.constant(['lcORJ5hemOZc1iGo9z7k', 'UDRRwOlzlWVbu7H8YCCi','QHGAef0TI6dhn0wTogvW'])
    products = tf.constant([ 8650774,  9306139,  9961521])
    
    sr1=SimpleRecommender(dummy_users,products,15)
    sr1([tf.constant([['lcORJ5hemOZc1iGo9z7k'],['QHGAef0TI6dhn0wTogvW']]),
         tf.constant([[8650774, 9306139, 9961521],[8650774, 9306139, 9961521]])])