I'm programming in python 3.7.5 using keras and TensorFlow 1.13.1
I want remove batch normalization layer from model coded below:
from keras import backend as K
from keras.callbacks import *
from keras.layers import *
from keras.models import *
from keras.utils import *
from keras.optimizers import Adadelta, RMSprop, Adam, SGD
from keras.callbacks import ModelCheckpoint
from keras.callbacks import TensorBoard
from config import *
def ctc_lambda_func(args):
iy_pred, ilabels, iinput_length, ilabel_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
iy_pred = iy_pred[:, 2:, :] # no such influence
return K.ctc_batch_cost(ilabels, iy_pred, iinput_length, ilabel_length)
def CRNN_model(is_training=True):
inputShape = Input((width, height, 1), name='input') # base on Tensorflow backend
conv_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputShape)
conv_2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv_1)
#batchnorm_2 = BatchNormalization()(conv_2)
pool_2 = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool_2)
conv_4 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_3)
#batchnorm_4 = BatchNormalization()(conv_4)
pool_4 = MaxPooling2D(pool_size=(2, 2))(conv_4)
conv_5 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool_4)
conv_6 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv_5)
pool_5 = MaxPool2D(pool_size=(2, 2))(conv_6)
#batchnorm_6 = BatchNormalization()(conv_6)
#bn_shape = batchnorm_6.get_shape()
#print(bn_shape)
#x_reshape = Reshape(target_shape=(int(bn_shape[1]), int(bn_shape[2] * bn_shape[3])))(batchnorm_6)
#drop_reshape = Dropout(0.25, name='d1')(x_reshape)
fl_1 = Flatten()(pool_5)
fc_1 = Dense(256, activation='relu')(fl_1)
#print(x_reshape.get_shape())
#print(fc_1.get_shape())
bi_LSTM_1 = Bidirectional(LSTM(256, return_sequences=True, kernel_initializer='he_normal'), merge_mode='sum')(fc_1)
bi_LSTM_2 = Bidirectional(LSTM(128, return_sequences=True, kernel_initializer='he_normal'), merge_mode='concat')(bi_LSTM_1)
#drop_rnn = Dropout(0.3, name='d2')(bi_LSTM_2)
fc_2 = Dense(label_classes, kernel_initializer='he_normal', activation='softmax')(bi_LSTM_2)
base_model = Model(inputs=[inputShape], outputs=fc_2)
labels = Input(name='the_labels', shape=[label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([fc_2, labels, input_length, label_length])
if is_training:
return Model(inputs=[inputShape, labels, input_length, label_length], outputs=[loss_out]), base_model
else:
return base_model
but I get this error:
Traceback (most recent call last):
File "C:/Users/Babak/PycharmProjects/CRNN-OCR/captcha-recognition-master1/captcha-recognition-master/training.py", line 79, in <module>
model, base_model = CRNN_model(is_training=True)
File "C:\Users\Babak\PycharmProjects\CRNN-OCR\captcha-recognition-master1\captcha-recognition-master\model.py", line 51, in CRNN_model
bi_LSTM_1 = Bidirectional(LSTM(256, return_sequences=True, kernel_initializer='he_normal'), merge_mode='sum')(fc_1)
File "C:\Program Files\Python37\lib\site-packages\keras\layers\wrappers.py", line 437, in __call__
return super(Bidirectional, self).__call__(inputs, **kwargs)
File "C:\Program Files\Python37\lib\site-packages\keras\engine\base_layer.py", line 446, in __call__
self.assert_input_compatibility(inputs)
File "C:\Program Files\Python37\lib\site-packages\keras\engine\base_layer.py", line 342, in assert_input_compatibility
str(K.ndim(x)))
ValueError: Input 0 is incompatible with layer bidirectional_1: expected ndim=3, found ndim=2
Process finished with exit code 1
How can I remove batch norm layers which is commented. I note that I manually remove drop out layers. So assume that dropout are removed. I remove dropout layers without problem. But I have problem in removing batch normalization layers
As per the error code, LSTM
layers expect 3D input tensors, but Dense
outputs only 2D. Many possible fixes exist, but not all will work equally well:
Conv2D
outputs 4D tensors, shaped (samples, height, width, channels)
LSTM
expects input shaped (samples, timesteps, channels)
(height, width)
dimensions into timesteps
In existing research, image data is flattened and treated sequentially - however, channels
remain untouched. Thus, a viable approach is to use Reshape
to yield a 3D tensor shaped (samples, height*width, channels)
. Finally, as Dense
cannot work with 3D data, you'll need the TimeDistributed
wrapper that'll apply the same Dense
weights to dim 1 of input - i.e. to timesteps
:
pool_shapes = K.int_shape(pool_5)
fl_1 = Reshape((pool_shapes[1] * pool_shapes[2], pool_shapes[3]))(pool_5)
fc_1 = TimeDistributed(Dense(256, activation='relu'))(fl_1)
Lastly, return_sequences=True
outputs a 3D tensor, which your output Dense
cannot handle - so either use return_sequences=False
to output 2D, or insert a Flatten
before the Dense
.