python tensorflow keras conv-neural-network max-pooling

Upsampling with pooling indices keras (unpooling)

I start by saying that I'm kinda new to deep learning

I'm trying to write a segnet in keras that uses pooling indices to upsample.

I'm using this function with a Lambda Layer to perform a max pooling and save pooling indices:

def pool_argmax2D(x, pool_size=(2,2), strides=(2,2)):
    padding = 'SAME'
    pool_size = [1, pool_size[0], pool_size[1], 1]
    strides = [1, strides[0], strides[1], 1]
    ksize = [1, pool_size[0], pool_size[1], 1]
    output, argmax = tf.nn.max_pool_with_argmax(
        x,
        ksize = ksize,
        strides = strides,
        padding = padding
    )

    return [output, argmax]

[...]
pool_4, mask_4 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_10)
[...]

It seems working. In my model summary it returns a tensor of shape (None, h/2, w/2, channels). However I'm having some issues to find or write a working unpooling function. I'm unable to return a tensor of shape (None, 2h,2w, channels) (None for batch size)

I have already tried these unpooling function (but not only) i found on stackoverflow: Function1 Function2

With no results

Can anybody help me? Thanks

EDIT: This is the model I'm trying to use

def getSegNet3(n_ch, height , width, n_labels, pool_size=(2, 2), output_mode="sigmoid"):
    # encoder
    inputs = Input(shape=(n_ch, height, width))

    conv_1 = Conv2D(16, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(inputs)
    conv_1 = BatchNormalization(axis=1)(conv_1)
    conv_1 = Activation("relu")(conv_1)
    conv_2 = Conv2D(16, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_1)

    conv_2 = BatchNormalization(axis=1)(conv_2)
    conv_2 = Activation("relu")(conv_2)

    conv_2 = core.Permute((2, 3, 1))(conv_2)
    pool_1, mask_1 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_2)
    pool_1 = core.Permute((3, 1, 2))(pool_1)

    conv_3 = Conv2D(32, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(pool_1)
    conv_3 = BatchNormalization(axis=1)(conv_3)
    conv_3 = Activation("relu")(conv_3)
    conv_4 = Conv2D(32, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_3)
    conv_4 = BatchNormalization(axis=1)(conv_4)
    conv_4 = Activation("relu")(conv_4)

    conv_4 = core.Permute((2, 3, 1))(conv_4)
    pool_2, mask_2 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_4)
    pool_2 = core.Permute((3, 1, 2))(pool_2)

    conv_5 = Conv2D(64, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(pool_2)
    conv_5 = BatchNormalization(axis=1)(conv_5)
    conv_5 = Activation("relu")(conv_5)
    conv_6 = Conv2D(64, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_5)
    conv_6 = BatchNormalization(axis=1)(conv_6)
    conv_6 = Activation("relu")(conv_6)
    conv_7 = Conv2D(64, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_6)
    conv_7 = BatchNormalization(axis=1)(conv_7)
    conv_7 = Activation("relu")(conv_7)

    conv_7 = core.Permute((2, 3, 1))(conv_7)
    pool_3, mask_3 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_7)
    pool_3 = core.Permute((3, 1, 2))(pool_3)

    conv_8 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(pool_3)
    conv_8 = BatchNormalization(axis=1)(conv_8)
    conv_8 = Activation("relu")(conv_8)
    conv_9 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_8)
    conv_9 = BatchNormalization(axis=1)(conv_9)
    conv_9 = Activation("relu")(conv_9)
    conv_10 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_9)
    conv_10 = BatchNormalization(axis=1)(conv_10)
    conv_10 = Activation("relu")(conv_10)

    conv_10 = core.Permute((2, 3, 1))(conv_10)
    pool_4, mask_4 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_10)
    pool_4 = core.Permute((3, 1, 2))(pool_4)

    conv_11 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(pool_4)
    conv_11 = BatchNormalization(axis=1)(conv_11)
    conv_11 = Activation("relu")(conv_11)
    conv_12 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_11)
    conv_12 = BatchNormalization(axis=1)(conv_12)
    conv_12 = Activation("relu")(conv_12)
    conv_13 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_12)
    conv_13 = BatchNormalization(axis=1)(conv_13)
    conv_13 = Activation("relu")(conv_13)

    conv_13 = core.Permute((2, 3, 1))(conv_13)
    pool_5, mask_5 = Lambda(pool_argmax2D, arguments={'pool_size': pool_size, 'strides': pool_size})(conv_13)

    print("Build encoder done..")

    # decoder


    #unpool_1 = MaxUnpooling2D(pool_5, mask_5,(None,4,4,256))
    unpool_1 = Lambda(unpool2D,output_shape=(4,4,256),arguments={'ind':mask_5})(pool_5)
    unpool_1 = core.Permute((3, 1, 2))(unpool_1)

    conv_14 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_1)
    conv_14 = BatchNormalization(axis=1)(conv_14)
    conv_14 = Activation("relu")(conv_14)
    conv_15 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_14)
    conv_15 = BatchNormalization(axis=1)(conv_15)
    conv_15 = Activation("relu")(conv_15)
    conv_16 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_15)
    conv_16 = BatchNormalization(axis=1)(conv_16)
    conv_16 = Activation("relu")(conv_16)

    conv_16 = core.Permute((2, 3, 1))(conv_16)
    unpool_2 = Lambda(unpool2D,output_shape=(8,8,256),arguments={'ind':mask_4})(conv_16)
    unpool_2 = core.Permute((3, 1, 2))(unpool_2)

    conv_17 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_2)
    conv_17 = BatchNormalization(axis=1)(conv_17)
    conv_17 = Activation("relu")(conv_17)
    conv_18 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_17)
    conv_18 = BatchNormalization(axis=1)(conv_18)
    conv_18 = Activation("relu")(conv_18)
    conv_19 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_18)
    conv_19 = BatchNormalization(axis=1)(conv_19)
    conv_19 = Activation("relu")(conv_19)

    conv_19 = core.Permute((2, 3, 1))(conv_19)
    unpool_3 = Lambda(unpool2D,output_shape=(16,16,128),arguments={'ind':mask_3})(conv_19)
    unpool_3 = core.Permute((3, 1, 2))(unpool_3)


    conv_20 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_3)
    conv_20 = BatchNormalization(axis=1)(conv_20)
    conv_20 = Activation("relu")(conv_20)
    conv_21 = Conv2D(128, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_20)
    conv_21 = BatchNormalization(axis=1)(conv_21)
    conv_21 = Activation("relu")(conv_21)
    conv_22 = Conv2D(64, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_21)
    conv_22 = BatchNormalization(axis=1)(conv_22)
    conv_22 = Activation("relu")(conv_22)

    conv_22 = core.Permute((2, 3, 1))(conv_22)
    unpool_4 = Lambda(unpool2D,output_shape=(32,32,64),arguments={'ind':mask_2})(conv_22)
    unpool_4 = core.Permute((3, 1, 2))(unpool_4)

    conv_23 = Conv2D(64, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_4)
    conv_23 = BatchNormalization(axis=1)(conv_23)
    conv_23 = Activation("relu")(conv_23)
    conv_24 = Conv2D(32, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(conv_23)
    conv_24 = BatchNormalization(axis=1)(conv_24)
    conv_24 = Activation("relu")(conv_24)

    conv_24 = core.Permute((2, 3, 1))(conv_24)
    unpool_5 = Lambda(unpool2D,output_shape=(64,64,32),arguments{'ind':mask_1})(conv_24)
    unpool_5 = core.Permute((3, 1, 2))(unpool_5)

    conv_25 = Conv2D(32, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_5)
    conv_25 = BatchNormalization(axis=1)(conv_25)
    conv_25 = Activation("relu")(conv_25)
    conv_26 = Convolution2D(n_labels, (1, 1), padding="valid", data_format="channels_first")(conv_25)
    conv_26 = BatchNormalization(axis=1)(conv_26)


    outputs = Activation(output_mode)(conv_26)
    print("Build decoder done..")

    model = Model(inputs=inputs, outputs=outputs, name="SegNet")

    return model

Function I'm trying to use:

def unpool2D(pool, ind, ksize=(2,2)):
    with tf.compat.v1.variable_scope("unpool"):
        input_shape =  tf.shape(pool)
        output_shape = [input_shape[0],
                        input_shape[1] * ksize[0],
                        input_shape[2] * ksize[1],
                        input_shape[3]]

        flat_input_size = tf.math.cumprod(input_shape)[-1]
        flat_output_shape = tf.cast([output_shape[0],
                            output_shape[1] * output_shape[2] * output_shape[3]], tf.int64)

        pool_ = tf.reshape(pool, [flat_input_size])
        batch_range = tf.reshape(tf.range(tf.cast(output_shape[0], tf.int64), dtype=tf.int64),
                                shape=[input_shape[0], 1, 1, 1])

        b = tf.ones_like(ind) * batch_range
        b = tf.reshape(b, [flat_input_size, 1])

        ind_ = tf.reshape(ind, [flat_input_size, 1]) % flat_output_shape[1]
        ind_ = tf.concat([b, ind_], 1)
        ret = tf.scatter_nd(ind_, pool_, shape=flat_output_shape)
        ret = tf.reshape(ret, output_shape)
        return ret

This is what I get:

~/bones-adamo/models.py in getSegNet3(n_ch, height, width, n_labels, pool_size, output_mode)
   1013     unpool_1 = core.Permute((3, 1, 2))(unpool_1)
   1014 
-> 1015     conv_14 = Conv2D(256, (3, 3), kernel_initializer='he_normal', padding='same',data_format='channels_first')(unpool_1)
   1016     conv_14 = BatchNormalization(axis=1)(conv_14)
   1017     conv_14 = Activation("relu")(conv_14)

~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    923     # >> model = tf.keras.Model(inputs, outputs)
    924     if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
--> 925       return self._functional_construction_call(inputs, args, kwargs,
    926                                                 input_list)
    927 

~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
   1096         # Build layer if applicable (if the `build` method has been
   1097         # overridden).
-> 1098         self._maybe_build(inputs)
   1099         cast_inputs = self._maybe_cast_inputs(inputs, input_list)
   1100 

~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
   2641         # operations.
   2642         with tf_utils.maybe_init_scope(self):
-> 2643           self.build(input_shapes)  # pylint:disable=not-callable
   2644       # We must set also ensure that the layer is marked as built, and the build
   2645       # shape is stored since user defined build functions may not be calling

~/venv/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py in build(self, input_shape)
    185   def build(self, input_shape):
    186     input_shape = tensor_shape.TensorShape(input_shape)
--> 187     input_channel = self._get_input_channel(input_shape)
    188     if input_channel % self.groups != 0:
    189       raise ValueError(

~/venv/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py in _get_input_channel(self, input_shape)
    357     channel_axis = self._get_channel_axis()
    358     if input_shape.dims[channel_axis].value is None:
--> 359       raise ValueError('The channel dimension of the inputs '
    360                        'should be defined. Found `None`.')
    361     return int(input_shape[channel_axis])

ValueError: The channel dimension of the inputs should be defined. Found `None`.

Solution

Okay, I solved my issue. There was a model architecture problem I didn't find at the first time. If you want to use pooling indices to upsample, I suggest you to use these custom layers here.

class MaxUnpooling2D(Layer):
    def __init__(self, size=(2, 2), **kwargs):
        super(MaxUnpooling2D, self).__init__(**kwargs)
        self.size = size

    def call(self, inputs, output_shape=None):
        updates, mask = inputs[0], inputs[1]
        with tf.compat.v1.variable_scope(self.name):
            mask = K.cast(mask, 'int32')
            input_shape = tf.shape(updates, out_type='int32')
            #print(updates.shape)
            #print(mask.shape)
            if output_shape is None:
                output_shape = (
                    input_shape[0],
                    input_shape[1] * self.size[0],
                    input_shape[2] * self.size[1],
                    input_shape[3])

            ret = tf.scatter_nd(K.expand_dims(K.flatten(mask)),
                                  K.flatten(updates),
                                  [K.prod(output_shape)])

            input_shape = updates.shape
            out_shape = [-1,
                         input_shape[1] * self.size[0],
                         input_shape[2] * self.size[1],
                         input_shape[3]]
        return K.reshape(ret, out_shape)

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'size': self.size
        })
        return config

    def compute_output_shape(self, input_shape):
        mask_shape = input_shape[1]
        return (
                mask_shape[0],
                mask_shape[1]*self.size[0],
                mask_shape[2]*self.size[1],
                mask_shape[3]
                )

Usage example:

unpool_3 = MaxUnpooling2D()([conv_19,mask_3])

I added get_config to avoid this error:

NotImplementedError: Layer MaxPoolingWithArgmax2D has arguments in `__init__` and therefore must override `get_config`.

Hope this answer could help another user