Search code examples
python-3.xkerastensorflow2.0semantic-segmentation

Tensorflow custom loss Incompatible shapes


model:Deeplab v3+ backbone network:resnet50 custom loss:binary_crossentropy + dice loss

I don't know why I got this Incompatible shapes error after I changed binary_crossentropy loss into binary_crossentropy + dice loss.

Here is my code.

import os

import cv2

import numpy as np

from glob import glob

from scipy.io import loadmat

import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow import keras

from tensorflow.keras import layers

IMAGE_SIZE = 400

NUM_CLASSES = 2

img_gen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,vertical_flip=True,rescale=1./255)

mask_gen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,vertical_flip=True,rescale=1./255)

image_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/img",target_size=(400, 400),class_mode=None,color_mode="rgb",batch_size=8,seed=1)

mask_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1)

train_generator = zip(image_generator, mask_generator)#或train_generator = (image_generator, mask_generator)

image_valid_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/valid/img",target_size=(400, 400),class_mode=None,color_mode="rgb",batch_size=8,seed=1)

mask_valid_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/valid/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1)

valid_generator = zip(image_valid_generator, mask_valid_generator)

" Found 320 images belonging to 1 classes.

Found 320 images belonging to 1 classes.

Found 20 images belonging to 1 classes.

Found 20 images belonging to 1 classes."

def convolution_block(
    block_input,
    num_filters=256,
    kernel_size=3,
    dilation_rate=1,
    padding="same",
    use_bias=False,
):
    x = layers.Conv2D(
        num_filters,
        kernel_size=kernel_size,
        dilation_rate=dilation_rate,
        padding="same",
        use_bias=use_bias,
        kernel_initializer=keras.initializers.HeNormal(),
    )(block_input)
    x = layers.BatchNormalization()(x)
    return tf.nn.relu(x)


def DilatedSpatialPyramidPooling(dspp_input):
    dims = dspp_input.shape
    x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
    x = convolution_block(x, kernel_size=1, use_bias=True)
    out_pool = layers.UpSampling2D(
        size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",
    )(x)

    out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
    out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
    out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
    out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)

    x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
    output = convolution_block(x, kernel_size=1)
    return output

def DeeplabV3Plus(image_size, num_classes):
    model_input = keras.Input(shape=(image_size, image_size, 3))
    resnet50 = keras.applications.ResNet50(
        weights="imagenet", include_top=False, input_tensor=model_input
    )
    x = resnet50.get_layer("conv4_block6_2_relu").output
    x = DilatedSpatialPyramidPooling(x)

    input_a = layers.UpSampling2D(
        size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
        interpolation="bilinear",
    )(x)
    input_b = resnet50.get_layer("conv2_block3_2_relu").output
    input_b = convolution_block(input_b, num_filters=48, kernel_size=1)

    x = layers.Concatenate(axis=-1)([input_a, input_b])
    x = convolution_block(x)
    x = convolution_block(x)
    x = layers.UpSampling2D(
        size=(image_size // x.shape[1], image_size // x.shape[2]),
        interpolation="bilinear",
    )(x)
    model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
    return keras.Model(inputs=model_input, outputs=model_output)


model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()

The output layer is "conv2d_9 (Conv2D) (None, 400, 400, 2) 514 ['up_sampling2d_2[0][0]']"

from keras import backend as K
def bce_logdice_loss(y_true, y_pred):
    print(y_true.shape)
    print(y_pred.shape)
    intersection = K.sum(y_true * y_pred, axis=[1,2,3])
    union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
    dice_loss = K.mean( (2. * intersection + 1) / (union + 1), axis=0)
    return tf.keras.losses.binary_crossentropy(y_true, y_pred) +1.0 - dice_loss


def my_iou(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred = K.cast(y_pred, 'float32')
    y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
    intersection = y_true_f * y_pred_f
    iou_result = ( K.sum(intersection)+1.0)/(K.sum(y_true_f)+K.sum(y_pred_f)-K.sum(intersection)+1.0)
    return iou_result

from keras.callbacks import Callback, ModelCheckpoint
checkpoint = ModelCheckpoint(
    '/content/gdrive/MyDrive/Colab Notebooks/my_deeplab_model_temp.h5', 
    monitor='val_loss',  
    save_best_only=True, 
    save_weights_only=False,
    mode='auto'
)
    model.compile(loss=bce_logdice_loss, optimizer="adam", metrics=['accuracy',my_iou])


history = model.fit(train_generator, steps_per_epoch=320/8,batch_size=8, epochs=10,validation_data=valid_generator,validation_steps=20/8)

Epoch 1/10

(None, None, None, None)

(None, 400, 400, 2)

(None, None, None, None)

(None, 400, 400, 2)


InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-56-7b3b6ce46c0f> in <module>()
      
----> 1 history = model.fit(train_generator, steps_per_epoch=320/8,batch_size=8, epochs=10,validation_data=valid_generator,validation_steps=20/8,callbacks=[checkpoint])

1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     57     ctx.ensure_initialized()
     58     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 59                                         inputs, attrs, num_outputs)
     60   except core._NotOkStatusException as e:
     61     if name is not None:

InvalidArgumentError:  Incompatible shapes: [2560000] vs. [1280000]

I saw some similar questions say that you can try using reshape function, but I think that it is weird.

Because I saw some examples on the Kaggle. They don't use reshape function.

And I print the y_pred and y_true shape in order to debug. But I don't know why the y_true shape is none.

I wrote this code.

mask_test=next(img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1))

print(mask_test[0].shape)

And I got this "(400, 400, 1)"

So I think my training ground truth is OK.

I don't know how to deal with this bug. I hope someone can help me.


Solution

  • Your bce_logdice_loss loss looks fine to me.

    Do you know where 2560000 could come from?

    Note that the shape of y_pred and y_true is None at first because Tensorflow is creating the computation graph without knowing the batch_size. Once created only, the model will use shapes with batch_size as first dimension instead of None.