Search code examples
tensorflowkerasloss-function

TripletSemiHardLoss requests a shape 32 [[{{node TripletSemiHardLoss/PartitionedCall/Reshape}}]] [Op:__inference_train_function_1666]


I am currently working on a use case where I want to use TripletSemiHardLoss to get a Ranking of similarity. Basically, I am having a training file that has categorical variables and each set of these variables has a corresponding category (label) it should be associated with. To achieve that I have implemented the following code:

Importing data, one_hot encode it, seperate in anchor, positive, negative set, build the model and train it:

def learn_ranking_loss(settings):
    training_data, label_columns, label_dict, features_dict, losses, loss_weights = data_handling.training_data_labeling(
        ['Diagnosis'], settings["CONFIGURATION_FILE_PATH"], binary_features=settings["BINARY_FEATURES"],
        ignore=['Runtime', 'Consistency check'])
    train_x, test_x, train_labels, test_labels, input_neuron_list, output_neuron_list = data_preprocessing.data_preprocessing_learning(
        training_data, label_columns)

    training_data = tf.data.Dataset.zip(
        (tf.data.Dataset.from_tensor_slices(train_x[:(round(len(train_x) / 3))]),
         tf.data.Dataset.from_tensor_slices(
             train_x[(round(len(train_x) / 3)):(round(len(train_x) / 3) * 2)]),
         tf.data.Dataset.from_tensor_slices(
             train_x[(round(len(train_x) / 3) * 2):])))
    train_labels = tf.data.Dataset.zip(
        (tf.data.Dataset.from_tensor_slices(train_labels[0][:(round(len(train_labels[0]) / 3))]),
         tf.data.Dataset.from_tensor_slices(
             train_labels[0][(round(len(train_labels[0]) / 3)):(round(len(train_labels[0]) / 3) * 2)]),
         tf.data.Dataset.from_tensor_slices(
             train_labels[0][(round(len(train_labels[0]) / 3) * 2):])))

    dataset = tf.data.Dataset.zip((training_data, train_labels)).batch(32)

    emb_mod, model = get_siamese_model([287, ])

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.001),
        loss=tfa.losses.TripletSemiHardLoss())

    # Train the network
    history = model.fit(
        dataset,
        epochs=1)
    return

The tensor created based on the one hot encoding for the input is [287,] and the shape of the labels is [52,].

The model is created the following:

def create_model(input_shape):
    inp = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Dense(287, activation='relu')(inp)
    x = tf.keras.layers.Dense(287, activation='relu')(x)
    x = tf.keras.layers.Dense(52, activation=None)(x)
    # x = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(x, axis=1))(x)
    model = tf.keras.Model(inp, x)
    return model


def get_siamese_model(input_shape):

    # Define the tensors for the triplet of input images
    anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
    positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
    negative_input = tf.keras.layers.Input(input_shape, name="negative_input")

    # Convolutional Neural Network (same from earlier)
    embedding_model = create_model(input_shape)

    # Generate the embedding outputs
    encoded_anchor = embedding_model(anchor_input)
    encoded_positive = embedding_model(positive_input)
    encoded_negative = embedding_model(negative_input)

    inputs = [anchor_input, positive_input, negative_input]
    outputs = [encoded_anchor, encoded_positive, encoded_negative]
    # x = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(outputs, axis=1))(outputs)

    # Connect the inputs with the outputs
    siamese_triplet = tf.keras.Model(inputs=inputs, outputs=outputs)

    # return the model
    return embedding_model, siamese_triplet

When running the code the following error message is returned:

Traceback (most recent call last):
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\ranking_loss.py", line 94, in <module>
    learn_ranking_loss(settings_dict)
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\ranking_loss.py", line 74, in learn_ranking_loss
    history = model.fit(
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\keras\engine\training.py", line 1189, in fit
    tmp_logs = self.train_function(iterator)
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\def_function.py", line 885, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\def_function.py", line 950, in _call
    return self._stateless_fn(*args, **kwds)
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\function.py", line 3039, in __call__
    return graph_function._call_flat(
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\function.py", line 1963, in _call_flat
    return self._build_call_outputs(self._inference_function.call(
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\function.py", line 591, in call
    outputs = execute.execute(
  File "C:\Users\mathi\Documents\Studium\Promotion\ConLearn\Python interpreter\lib\site-packages\tensorflow\python\eager\execute.py", line 59, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError:  Input to reshape is a tensor with 1664 values, but the requested shape has 32
     [[{{node TripletSemiHardLoss/PartitionedCall/Reshape}}]] [Op:__inference_train_function_1666]

Function call stack:
train_function


Process finished with exit code 1

After doing some research I am sure that the problem is caused by the shape of my layers, but I was not able to figure out what needs to be changed. I also observed that 1664 divided by 32 is exactly 52 so the output shape. Also, if I change the shapes of my layers no matter what the same issue occurs.

Any help would be very much appreciated. Thank you very much!

Best regards, Mathias


Solution

  • I have figured the problem out and resolved it.

    The problem was that tensorflow TripletSemiHardLoss is expecting the labels as categorical input (a simple series of categorical values per training item). The error was to one-hot encode the labels as well as you normally have to do it for cross-entropy loss functions.

    Please find in the following the fixed code. Please also notew that I changed in the mean time from offline selection of anchor, positve and negative to online selction of the same during training, since it is out of the box supported by the tensorflow implementation TripletSemiHardLoss:

    def create_model(input_shape):
    inp = tf.keras.layers.Input(shape=input_shape)
    x = tf.keras.layers.Dense(280, activation='relu')(inp)
    y = tf.keras.layers.Dense(280, activation='relu')(x)
    z = tf.keras.layers.Dense(52, activation=None)(y)
    # l2_normalization = tf.keras.layers.Lambda(lambda y: tf.math.l2_normalize(y, axis=1))(z)
    
    model = tf.keras.Model(inp, z)
    return model
    
    def learn_ranking_loss(settings):
    one_hot = OneHotEncoder()
    
    # get data
    # build training data set
    training_data, label_columns, label_dict, features_dict, losses, loss_weights = data_handling.training_data_labeling(
        ['Diagnosis'], settings["CONFIGURATION_FILE_PATH"], binary_features=settings["BINARY_FEATURES"],
        ignore=['Runtime', 'Consistency check'])
    train_x = np.array(training_data)
    train_x = one_hot.fit_transform(train_x).toarray()
    train_y = label_columns[0].to_numpy()
    train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
    train_dataset = train_dataset.batch(128)
    
    # build test data set
    validation_data, label_columns, label_dict, losses, loss_weights = data_handling.data_labeling(
        ['Diagnosis'], settings["VALIDATION_FILE_PATH"], binary_features=settings["BINARY_FEATURES"],
        ignore=['Runtime', 'Consistency check'])
    test_x = np.array(validation_data)
    test_x = one_hot.fit_transform(test_x).toarray()
    test_y = label_columns[0].to_numpy()
    test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y))
    test_dataset = test_dataset.batch(128)
    
    if not data_handling.data_consistency_triplet_loss(training_data, validation_data):
        return
    
    # create model
    model = create_model([280, ])
    # emb_mod, model = get_siamese_model([280, ])
    
    # train model
    epochs = 3
    lr = 0.001
    optimizer = tf.optimizers.Adam(learning_rate=lr)
    
    model.compile(optimizer=optimizer, loss=tfa.losses.TripletSemiHardLoss())  # , metrics=["accuracy"])
    model.summary()
    
    history = model.fit(train_dataset, epochs=epochs) 
    return
    

    Hope this helps others who run into the same problem in the future.

    Best regards, Mathias