model:Deeplab v3+ backbone network:resnet50 custom loss:binary_crossentropy + dice loss
I don't know why I got this Incompatible shapes error after I changed binary_crossentropy loss into binary_crossentropy + dice loss.
Here is my code.
import os
import cv2
import numpy as np
from glob import glob
from scipy.io import loadmat
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
IMAGE_SIZE = 400
NUM_CLASSES = 2
img_gen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,vertical_flip=True,rescale=1./255)
mask_gen = tf.keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,vertical_flip=True,rescale=1./255)
image_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/img",target_size=(400, 400),class_mode=None,color_mode="rgb",batch_size=8,seed=1)
mask_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1)
train_generator = zip(image_generator, mask_generator)#或train_generator = (image_generator, mask_generator)
image_valid_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/valid/img",target_size=(400, 400),class_mode=None,color_mode="rgb",batch_size=8,seed=1)
mask_valid_generator = img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/valid/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1)
valid_generator = zip(image_valid_generator, mask_valid_generator)
" Found 320 images belonging to 1 classes.
Found 320 images belonging to 1 classes.
Found 20 images belonging to 1 classes.
Found 20 images belonging to 1 classes."
def convolution_block(
block_input,
num_filters=256,
kernel_size=3,
dilation_rate=1,
padding="same",
use_bias=False,
):
x = layers.Conv2D(
num_filters,
kernel_size=kernel_size,
dilation_rate=dilation_rate,
padding="same",
use_bias=use_bias,
kernel_initializer=keras.initializers.HeNormal(),
)(block_input)
x = layers.BatchNormalization()(x)
return tf.nn.relu(x)
def DilatedSpatialPyramidPooling(dspp_input):
dims = dspp_input.shape
x = layers.AveragePooling2D(pool_size=(dims[-3], dims[-2]))(dspp_input)
x = convolution_block(x, kernel_size=1, use_bias=True)
out_pool = layers.UpSampling2D(
size=(dims[-3] // x.shape[1], dims[-2] // x.shape[2]), interpolation="bilinear",
)(x)
out_1 = convolution_block(dspp_input, kernel_size=1, dilation_rate=1)
out_6 = convolution_block(dspp_input, kernel_size=3, dilation_rate=6)
out_12 = convolution_block(dspp_input, kernel_size=3, dilation_rate=12)
out_18 = convolution_block(dspp_input, kernel_size=3, dilation_rate=18)
x = layers.Concatenate(axis=-1)([out_pool, out_1, out_6, out_12, out_18])
output = convolution_block(x, kernel_size=1)
return output
def DeeplabV3Plus(image_size, num_classes):
model_input = keras.Input(shape=(image_size, image_size, 3))
resnet50 = keras.applications.ResNet50(
weights="imagenet", include_top=False, input_tensor=model_input
)
x = resnet50.get_layer("conv4_block6_2_relu").output
x = DilatedSpatialPyramidPooling(x)
input_a = layers.UpSampling2D(
size=(image_size // 4 // x.shape[1], image_size // 4 // x.shape[2]),
interpolation="bilinear",
)(x)
input_b = resnet50.get_layer("conv2_block3_2_relu").output
input_b = convolution_block(input_b, num_filters=48, kernel_size=1)
x = layers.Concatenate(axis=-1)([input_a, input_b])
x = convolution_block(x)
x = convolution_block(x)
x = layers.UpSampling2D(
size=(image_size // x.shape[1], image_size // x.shape[2]),
interpolation="bilinear",
)(x)
model_output = layers.Conv2D(num_classes, kernel_size=(1, 1), padding="same")(x)
return keras.Model(inputs=model_input, outputs=model_output)
model = DeeplabV3Plus(image_size=IMAGE_SIZE, num_classes=NUM_CLASSES)
model.summary()
The output layer is "conv2d_9 (Conv2D) (None, 400, 400, 2) 514 ['up_sampling2d_2[0][0]']"
from keras import backend as K
def bce_logdice_loss(y_true, y_pred):
print(y_true.shape)
print(y_pred.shape)
intersection = K.sum(y_true * y_pred, axis=[1,2,3])
union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
dice_loss = K.mean( (2. * intersection + 1) / (union + 1), axis=0)
return tf.keras.losses.binary_crossentropy(y_true, y_pred) +1.0 - dice_loss
def my_iou(y_true, y_pred, smooth=1):
y_true_f = K.flatten(y_true)
y_pred = K.cast(y_pred, 'float32')
y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
intersection = y_true_f * y_pred_f
iou_result = ( K.sum(intersection)+1.0)/(K.sum(y_true_f)+K.sum(y_pred_f)-K.sum(intersection)+1.0)
return iou_result
from keras.callbacks import Callback, ModelCheckpoint
checkpoint = ModelCheckpoint(
'/content/gdrive/MyDrive/Colab Notebooks/my_deeplab_model_temp.h5',
monitor='val_loss',
save_best_only=True,
save_weights_only=False,
mode='auto'
)
model.compile(loss=bce_logdice_loss, optimizer="adam", metrics=['accuracy',my_iou])
history = model.fit(train_generator, steps_per_epoch=320/8,batch_size=8, epochs=10,validation_data=valid_generator,validation_steps=20/8)
Epoch 1/10
(None, None, None, None)
(None, 400, 400, 2)
(None, None, None, None)
(None, 400, 400, 2)
InvalidArgumentError Traceback (most recent call last)
<ipython-input-56-7b3b6ce46c0f> in <module>()
----> 1 history = model.fit(train_generator, steps_per_epoch=320/8,batch_size=8, epochs=10,validation_data=valid_generator,validation_steps=20/8,callbacks=[checkpoint])
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57 ctx.ensure_initialized()
58 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 59 inputs, attrs, num_outputs)
60 except core._NotOkStatusException as e:
61 if name is not None:
InvalidArgumentError: Incompatible shapes: [2560000] vs. [1280000]
I saw some similar questions say that you can try using reshape function, but I think that it is weird.
Because I saw some examples on the Kaggle. They don't use reshape function.
And I print the y_pred and y_true shape in order to debug. But I don't know why the y_true shape is none.
I wrote this code.
mask_test=next(img_gen.flow_from_directory("/content/gdrive/MyDrive/Colab Notebooks/my_dataset/road_data/training/mask",target_size=(400, 400),class_mode=None,color_mode="grayscale",batch_size=8,seed=1))
print(mask_test[0].shape)
And I got this "(400, 400, 1)"
So I think my training ground truth is OK.
I don't know how to deal with this bug. I hope someone can help me.
Your bce_logdice_loss
loss looks fine to me.
Do you know where 2560000
could come from?
Note that the shape of y_pred
and y_true
is None
at first because Tensorflow is creating the computation graph without knowing the batch_size
. Once created only, the model will use shapes with batch_size
as first dimension instead of None
.