tensorflow keras deep-learning conv-neural-network semantic-segmentation

How to convert Keras Conv3D output to a flatten layer for classification task?

I am trying to build a model which can simultaneously segment and classify a 3d image. I am using U-net 3d with keras Functional API, where at the end of the model we get two output branches, one for the segmentation of 3d map and one for classification problem for type of cancer.

The model works well, when I am only trying to solve segmentation problem but combining both throws some error.

The issue is around converting Keras Conv3D output to a flatten layer for softmax classification. I used both GlobalAveragePooling3D and Flatten() but, the problem remains.

import segmentation_models_3D as sm
from keras.models import Model
from keras.layers import Input, Conv3D, MaxPooling3D, concatenate, Conv3DTranspose, BatchNormalization, Dropout, Lambda, Flatten, Dense , GlobalAveragePooling3D
#from keras.optimizers import Adam
import keras.backend as K    
from tensorflow.keras.optimizers import Adam
from keras.metrics import MeanIoU

kernel_initializer =  'he_uniform' #Try others if you want


def unet_model(IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH, IMG_CHANNELS, num_classes):
#Build the model
    inputs = Input((IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH, IMG_CHANNELS), name="img")
    #s = Lambda(lambda x: x / 255)(inputs)   #No need for this if we normalize our inputs beforehand
    s = inputs

    #Contraction path
    c1 = Conv3D(16, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(s)
    c1 = Dropout(0.1)(c1)
    c1 = Conv3D(16, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c1)
    p1 = MaxPooling3D((2, 2, 2))(c1)

    c2 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(p1)
    c2 = Dropout(0.1)(c2)
    c2 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c2)
    p2 = MaxPooling3D((2, 2, 2))(c2)

    c3 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(p2)
    c3 = Dropout(0.2)(c3)
    c3 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c3)
    p3 = MaxPooling3D((2, 2, 2))(c3)

    c4 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(p3)
    c4 = Dropout(0.2)(c4)
    c4 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c4)
    p4 = MaxPooling3D(pool_size=(2, 2, 2))(c4)

    c5 = Conv3D(256, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(p4)
    c5 = Dropout(0.3)(c5)
    c5 = Conv3D(256, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c5)

    #Expansive path
    u6 = Conv3DTranspose(128, (2, 2, 2), strides=(2, 2, 2), padding='same')(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(u6)
    c6 = Dropout(0.2)(c6)
    c6 = Conv3D(128, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c6)

    u7 = Conv3DTranspose(64, (2, 2, 2), strides=(2, 2, 2), padding='same')(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(u7)
    c7 = Dropout(0.2)(c7)
    c7 = Conv3D(64, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c7)

    u8 = Conv3DTranspose(32, (2, 2, 2), strides=(2, 2, 2), padding='same')(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(u8)
    c8 = Dropout(0.1)(c8)
    c8 = Conv3D(32, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c8)

    u9 = Conv3DTranspose(16, (2, 2, 2), strides=(2, 2, 2), padding='same')(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv3D(16, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(u9)
    c9 = Dropout(0.1)(c9)
    c9 = Conv3D(16, (3, 3, 3), activation='relu', kernel_initializer=kernel_initializer, padding='same')(c9)

    output_mask = Conv3D(num_classes, (1, 1, 1), activation='softmax', name ='mask')(c9)
    output_mask_1 = Conv3D(num_classes, (1, 1, 1), activation='softmax')(c9)
    #outputs2 = Conv3D(num_classes, (1, 1, 1), activation='softmax')(c9)
    # addding 1d part

    # ..........................................................................  
    # aggregated = Lambda(lambda x: K.sum(x, axis=1))(output_mask_1)
    # flat = Flatten()(aggregated)
    # output_label = Dense(3, activation="relu")(flat)
    # output_label = Dense(1,name="lab",activation="relu")(output_label)
    #......................................................................

    output_label = GlobalAveragePooling3D()(output_mask_1)
    
    
    output_label = Dense(units=4, activation="relu")(output_label)
    output_label = Dense(units=3, activation="softmax",name="lab")(output_label)    
    
    
    model = Model(inputs=inputs, outputs=[output_mask
                                            , output_label
                                            ], name = "final_output")
    #compile model outside of this function to make it flexible.
    model.summary()

    return model

Dataset preprocessing,where I yield 1 input image and 2 set of labels.

def imageLoader(img_dir, img_list, mask_dir, mask_list, label_dir, label_list, batch_size): # some changes
    L = 5#len(img_list)
    #keras needs the generator infinite, so we will use while true
    while True:
        batch_start = 0
        batch_end =  batch_size # L-1
        while batch_start < L:
            limit = min(batch_end, L)
            X = load_img(img_dir, img_list[batch_start:limit])
            Y = load_img(mask_dir, mask_list[batch_start:limit])

            # add classification label.
            
            yield ({"img":X}, {"mask":Y,"lab":np.array([[0,1,0],[0,1,0]]) }) # one hot encoding for 3 class classification.
            
            

            batch_start += batch_size
            batch_end += batch_size

Basic settings.

import segmentation_models_3D as sm
dice_loss = sm.losses.DiceLoss(class_weights=np.array([wt0, wt1, wt2, wt3]))
focal_loss = sm.losses.CategoricalFocalLoss()
total_loss = dice_loss + (1 * focal_loss)

metrics = [ sm.metrics.IOUScore(threshold=0.5), 'accuracy']

LR = 0.0001
#optim = keras.optimizers.Adam(LR)
optim=tf.keras.optimizers.Adam(LR)

model = unet_model(IMG_HEIGHT=128,
                          IMG_WIDTH=128,
                          IMG_DEPTH=128,
                          IMG_CHANNELS=3,
                          num_classes=4)

model.compile(optimizer = optim, loss={"mask":total_loss, 
                                       "lab": keras.losses.BinaryCrossentropy()
                                       }
              
              , metrics=metrics)
print(model.summary())

history=model.fit_generator(
          train_img_datagen,
          steps_per_epoch=steps_per_epoch,
          epochs=3,
          verbose=1,
          validation_data=val_img_datagen,
          validation_steps=val_steps_per_epoch,
          )

I am not able to understand this error.

ValueError: in user code:

File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1021, in train_function * return step_function(self, iterator) File "/usr/local/lib/python3.10/dist-packages/segmentation_models_3D/metrics.py", line 62, in call * **self.submodules File "/usr/local/lib/python3.10/dist-packages/segmentation_models_3D/base/functional.py", line 93, in iou_score * intersection = backend.sum(gt * pr, axis=axes) File "/usr/local/lib/python3.10/dist-packages/keras/backend.py", line 2544, in sum return tf.reduce_sum(x, axis, keepdims)

ValueError: Invalid reduction dimension 2 for input with 2 dimensions. for '{{node Sum_10}} = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false](mul_5, Sum_10/reduction_indices)' with input shapes: [?,3], [4] and with computed input tensors: input1 = <0 1 2 3>.

Solution

This one is a good example why the full stack trace is important and why one should post more than the last line. The last line shows you what the error is, and the lines before show you (hopefully) where it originated.

File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1021, in train_function * return step_function(self, iterator) File "/usr/local/lib/python3.10/dist-packages/segmentation_models_3D/metrics.py", line 62, in call * **self.submodules File "/usr/local/lib/python3.10/dist-packages/segmentation_models_3D/base/functional.py", line 93, in iou_score * intersection = backend.sum(gt * pr, axis=axes) File "/usr/local/lib/python3.10/dist-packages/keras/backend.py", line 2544, in sum return tf.reduce_sum(x, axis, keepdims)

ValueError: Invalid reduction dimension 2 for input with 2 dimensions. for '{{node Sum_10}} = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false](mul_5, Sum_10/reduction_indices)' with input shapes: [?,3], [4] and with computed input tensors: input1 = <0 1 2 3>.

I highlighted the line I suspect throws the error. You specified which loss you want to use for which output, and I suspect you have to do this for the metrics too, because the IOUMetric is for the image output only and it seems it tries to sum up the lab output over an axis that does not exist.
So you can change

metrics = [ sm.metrics.IOUScore(threshold=0.5), 'accuracy']

metrics = {'mask': sm.metrics.IOUScore(threshold=0.5), 'lab': 'accuracy'}

You could add accuracy to mask too, then it would look like this:

metrics = {'mask': [sm.metrics.IOUScore(threshold=0.5), 'accuracy'], 'lab': 'accuracy'}

I did not test this solution, so let me know if it works for you.