python tensorflow keras deep-learning batch-normalization

ValueError: expected ndim=3, found ndim=2 after replacing BatchNormalization

I'm programming in python 3.7.5 using keras and TensorFlow 1.13.1

I want remove batch normalization layer from model coded below:

from keras import backend as K
from keras.callbacks import *
from keras.layers import *
from keras.models import *
from keras.utils import *
from keras.optimizers import Adadelta, RMSprop, Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard

from config import *


def ctc_lambda_func(args):
    iy_pred, ilabels, iinput_length, ilabel_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    iy_pred = iy_pred[:, 2:, :]  # no such influence
    return K.ctc_batch_cost(ilabels, iy_pred, iinput_length, ilabel_length)


def CRNN_model(is_training=True):
    inputShape = Input((width, height, 1), name='input')  # base on         Tensorflow backend
    conv_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputShape)
    conv_2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv_1)
    #batchnorm_2 = BatchNormalization()(conv_2)
    pool_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)

    conv_3 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool_2)
    conv_4 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_3)
    #batchnorm_4 = BatchNormalization()(conv_4)
    pool_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)

    conv_5 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool_4)
    conv_6 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_5)
    pool_5 = MaxPool2D(pool_size=(2, 2))(conv_6)
    #batchnorm_6 = BatchNormalization()(conv_6)

    #bn_shape = batchnorm_6.get_shape()


    #print(bn_shape)

    #x_reshape = Reshape(target_shape=(int(bn_shape[1]), int(bn_shape[2] * bn_shape[3])))(batchnorm_6)
    #drop_reshape = Dropout(0.25, name='d1')(x_reshape)
    fl_1 = Flatten()(pool_5)
    fc_1 = Dense(256, activation='relu')(fl_1)

    #print(x_reshape.get_shape())
    #print(fc_1.get_shape())

    bi_LSTM_1 = Bidirectional(LSTM(256, return_sequences=True, kernel_initializer='he_normal'), merge_mode='sum')(fc_1)
    bi_LSTM_2 = Bidirectional(LSTM(128, return_sequences=True, kernel_initializer='he_normal'), merge_mode='concat')(bi_LSTM_1)

    #drop_rnn = Dropout(0.3, name='d2')(bi_LSTM_2)

    fc_2 = Dense(label_classes, kernel_initializer='he_normal', activation='softmax')(bi_LSTM_2)

    base_model = Model(inputs=[inputShape], outputs=fc_2) 

    labels = Input(name='the_labels', shape=[label_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([fc_2, labels, input_length, label_length])

    if is_training:
        return Model(inputs=[inputShape, labels, input_length, label_length], outputs=[loss_out]), base_model
    else:
        return base_model

but I get this error:

Traceback (most recent call last):
  File "C:/Users/Babak/PycharmProjects/CRNN-OCR/captcha-recognition-master1/captcha-recognition-master/training.py", line 79, in <module>
    model, base_model = CRNN_model(is_training=True)
  File "C:\Users\Babak\PycharmProjects\CRNN-OCR\captcha-recognition-master1\captcha-recognition-master\model.py", line 51, in CRNN_model
    bi_LSTM_1 = Bidirectional(LSTM(256, return_sequences=True, kernel_initializer='he_normal'), merge_mode='sum')(fc_1)
  File "C:\Program Files\Python37\lib\site-packages\keras\layers\wrappers.py", line 437, in __call__
    return super(Bidirectional, self).__call__(inputs, **kwargs)
  File "C:\Program Files\Python37\lib\site-packages\keras\engine\base_layer.py", line 446, in __call__
    self.assert_input_compatibility(inputs)
  File "C:\Program Files\Python37\lib\site-packages\keras\engine\base_layer.py", line 342, in assert_input_compatibility
    str(K.ndim(x)))
ValueError: Input 0 is incompatible with layer bidirectional_1: expected ndim=3, found ndim=2

Process finished with exit code 1

How can I remove batch norm layers which is commented. I note that I manually remove drop out layers. So assume that dropout are removed. I remove dropout layers without problem. But I have problem in removing batch normalization layers

Solution

As per the error code, LSTM layers expect 3D input tensors, but Dense outputs only 2D. Many possible fixes exist, but not all will work equally well:

Conv2D outputs 4D tensors, shaped (samples, height, width, channels)
LSTM expects input shaped (samples, timesteps, channels)
Thus, you need to somehow transform the (height, width) dimensions into timesteps

In existing research, image data is flattened and treated sequentially - however, channels remain untouched. Thus, a viable approach is to use Reshape to yield a 3D tensor shaped (samples, height*width, channels). Finally, as Dense cannot work with 3D data, you'll need the TimeDistributed wrapper that'll apply the same Dense weights to dim 1 of input - i.e. to timesteps:

pool_shapes = K.int_shape(pool_5)
fl_1 = Reshape((pool_shapes[1] * pool_shapes[2], pool_shapes[3]))(pool_5)
fc_1 = TimeDistributed(Dense(256, activation='relu'))(fl_1)

Lastly, return_sequences=True outputs a 3D tensor, which your output Dense cannot handle - so either use return_sequences=False to output 2D, or insert a Flatten before the Dense.