tensorflow keras deep-learning neural-network recurrent-neural-network

Trouble converting tensorflow model architecture to keras api

I found a tensorflow model for image classification. It achived an accuracy of roughly 65%. The Code looks as follow:

def resnet_dropout(X, y, layer_depth=3, num_classes=250, reg=1e-2, is_training=True):
    # RESnet-ish
    l2_reg = tf.contrib.layers.l2_regularizer(reg)

    """
    Input: 128x128x1
    Output: 64x64x64
    """
    d0 = tf.layers.dropout(X, rate=0.2, training=is_training)
    c0 = tf.layers.conv2d(d0, 64, [7, 7], strides=[2, 2], padding='SAME', kernel_regularizer=l2_reg)
    c0 = tf.layers.batch_normalization(c0, training=is_training)
    match_dimensions = True
    for i in range(layer_depth):
        c1 = tf.layers.conv2d(c0, 64, [3, 3], padding='SAME', kernel_regularizer=l2_reg) #conv
        b1 = tf.layers.batch_normalization(c1, training=is_training) #bn
        h1 = tf.nn.relu(b1) #relu
        c2 = tf.layers.conv2d(h1, 64, [3, 3], padding='SAME', kernel_regularizer=l2_reg) #conv
        b2 = tf.layers.batch_normalization(c2, training=is_training) #bn
        r = c0 + b2
        c0 = tf.nn.relu(r)
    
    """
    Input: 64x64x64
    Output: 32x32x128
    """
    downsample = True
    for i in range(layer_depth):
        c1 = tf.layers.conv2d(c0, 128, [3, 3], 
                              strides=([2, 2] if downsample else [1, 1]),
                              padding='SAME',
                              kernel_regularizer=l2_reg)
        b1 = tf.layers.batch_normalization(c1, training=is_training) #bn
        h1 = tf.nn.relu(b1) #relu
        c2 = tf.layers.conv2d(h1, 128, [3, 3], padding='SAME', kernel_regularizer=l2_reg) #conv
        b2 = tf.layers.batch_normalization(c2, training=is_training) #bn
        if downsample:
            c0_proj = tf.layers.conv2d(c0, 128, [1, 1], padding='SAME', kernel_regularizer=l2_reg)
            c0_proj = tf.layers.average_pooling2d(c0_proj, (2, 2), (2, 2))
            r = c0_proj + b2
            downsample = False
        else:
            r = c0 + b2
        c0 = tf.nn.relu(r)

    """
    Input: 32x32x128
    Output: 16x16x256
    """
    downsample = True
    for i in range(layer_depth):
        c1 = tf.layers.conv2d(c0, 256, [3, 3], 
                              strides=([2, 2] if downsample else [1, 1]),
                              padding='SAME',
                              kernel_regularizer=l2_reg)
        b1 = tf.layers.batch_normalization(c1, training=is_training) #bn
        h1 = tf.nn.relu(b1) #relu
        c2 = tf.layers.conv2d(h1, 256, [3, 3], padding='SAME', kernel_regularizer=l2_reg) #conv
        b2 = tf.layers.batch_normalization(c2, training=is_training) #bn
        if downsample:
            c0_proj = tf.layers.conv2d(c0, 256, [1, 1], padding='SAME', kernel_regularizer=l2_reg)
            c0_proj = tf.layers.average_pooling2d(c0_proj, (2, 2), (2, 2))
            r = c0_proj + b2
            downsample = False
        else:
            r = c0 + b2
        c0 = tf.nn.relu(r)

    """
    Input: 16x16x256
    Output: 8x8x512
    """
    downsample = True
    for i in range(layer_depth):
        c1 = tf.layers.conv2d(c0, 512, [3, 3], 
                              strides=([2, 2] if downsample else [1, 1]),
                              padding='SAME',
                              kernel_regularizer=l2_reg)
        b1 = tf.layers.batch_normalization(c1, training=is_training) #bn
        h1 = tf.nn.relu(b1) #relu
        c2 = tf.layers.conv2d(h1, 512, [3, 3], padding='SAME', kernel_regularizer=l2_reg) #conv
        b2 = tf.layers.batch_normalization(c2, training=is_training) #bn
        if downsample:
            c0_proj = tf.layers.conv2d(c0, 512, [1, 1], padding='SAME', kernel_regularizer=l2_reg)
            c0_proj = tf.layers.average_pooling2d(c0_proj, (2, 2), (2, 2))
            r = c0_proj + b2
            downsample = False
        else:
            r = c0 + b2
        c0 = tf.nn.relu(r)
    
    p1 = tf.layers.average_pooling2d(c0, (8, 8), (1,1))
    p1_flat = tf.reshape(p1, [-1, 512])
    d1 = tf.layers.dropout(p1_flat, rate=0.2, training=is_training)
    y_out = tf.layers.dense(d1, num_classes, kernel_regularizer=l2_reg)
    
    return y_out

Then i wanted to convert the model architecture to the keras api.

def make_model3(input_shape, num_classes,reg):

    inputs = keras.Input(shape=input_shape)
    
    l2_reg = keras.regularizers.l2(reg)

    x = layers.Dropout(0.2)(inputs)
    x = layers.Conv2D(64,[7,7],strides=[2,2],padding="same",kernel_regularizer=l2_reg)(x)
    previous_block_activation = layers.BatchNormalization()(x)

     

    for i in range(3):
        x = layers.Conv2D(64,[3,3],padding="same",kernel_regularizer=l2_reg)(previous_block_activation)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.Conv2D(64,[3,3],padding="same",kernel_regularizer=l2_reg)(x)
        x = layers.BatchNormalization()(x)

        x = layers.add([x, previous_block_activation])  

        previous_block_activation = layers.Activation("relu")(x)

    downsample = True
    for i in range(3):
        x = layers.Conv2D(128,[3,3],padding="same",strides=([2,2] if downsample else [1,1]),kernel_regularizer=l2_reg)(previous_block_activation)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.Conv2D(128,[3,3],padding="same",kernel_regularizer=l2_reg)(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        if downsample:
            residual = layers.Conv2D(128,[1,1],padding="same",kernel_regularizer=l2_reg)(previous_block_activation)
            residual = layers.AveragePooling2D((2,2),(2,2))(residual)
            x = layers.add([x, residual])
            downsample = False
        else:
            x = layers.add([x, previous_block_activation])
        previous_block_activation = layers.Activation("relu")(x)

    downsample = True
    for i in range(3):
        x = layers.Conv2D(256,[3,3],padding="same",strides=([2,2] if downsample else [1,1]),kernel_regularizer=l2_reg)(previous_block_activation)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.Conv2D(256,[3,3],padding="same",kernel_regularizer=l2_reg)(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        if downsample:
            residual = layers.Conv2D(256,[1,1],padding="same",kernel_regularizer=l2_reg)(previous_block_activation)
            residual = layers.AveragePooling2D((2,2),(2,2))(residual)
            x = layers.add([x, residual])
            downsample = False
        else:
            x = layers.add([x, previous_block_activation])
        previous_block_activation = layers.Activation("relu")(x)

    downsample = True
    for i in range(3):
        x = layers.Conv2D(512,[3,3],padding="same",strides=([2,2] if downsample else [1,1]),kernel_regularizer=l2_reg)(previous_block_activation)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        x = layers.Conv2D(512,[3,3],padding="same",kernel_regularizer=l2_reg)(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation("relu")(x)
        if downsample:
            residual = layers.Conv2D(512,[1,1],padding="same",kernel_regularizer=l2_reg)(previous_block_activation)
            residual = layers.AveragePooling2D((2,2),(2,2))(residual)
            x = layers.add([x, residual])
            downsample = False
        else:
            x = layers.add([x, previous_block_activation])
        previous_block_activation = layers.Activation("relu")(x)

    x = layers.AveragePooling2D((8,8),(1,2))(previous_block_activation)
    x = layers.Reshape([-1,512])(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(num_classes,kernel_regularizer=l2_reg)(x)
    x = layers.Activation("softmax")(x)

    return keras.Model(inputs,x)

#data preperation

model = make_model3((128, 128, 1),250,reg=1e-2)
model.compile(optimizer = keras.optimizers.Adam(learning_rate=1e-3), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])

history = model.fit_generator(
            train_dir,
            steps_per_epoch=steps_per_epoch,
            epochs=15,
            validation_data=val_dir,
            validation_steps=validation_steps)

With the keras api the model reached in the first epoch an accuracy of 4% and was pretty much constant till the end of epoch 15. The loss converged pretty fast to 5.7.

I already tried to change the learning rate and different loss functions. Nothing changed. I tried to use a different model architecture with the same Parameters and data preparation and archived an accuracy of 66%. So i came to the conclusion that i translated the architecture wrong. So my question would be, where is the my mistake of translating the architecture ?

Solution

I found a solution to the problem. I changend the last layers from this:

x = layers.AveragePooling2D((8,8),(1,2))(previous_block_activation)
x = layers.Reshape([-1,512])(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(num_classes,kernel_regularizer=l2_reg)(x)
x = layers.Activation("softmax")(x)

to this:

x = layers.GlobalAveragePooling2D()(previous_block_activation)
x = layers.Dropout(0.2)(x)
x = layers.Dense(num_classes, activation='softmax')(x)

Performance is roughly the same as the original model. But i only archived that without an kernel regularizer.