Search code examples
pythonpython-3.xtensorflowkerasautoencoder

Mismatch in shape size of conv2d layer within autoencoder


I am building(reusing) a convolutional autoencoder but I am having an issue with the last conv2D layer as it's expecting a shape of (180,116,1) but receiving (184,120,1)[which is the shape of my images].

I have done some research but I have failed to solve the issue, does anyone have any solutions?

# process the images into data

    import glob
    import pandas as pd
    import numpy as np
    from encoder_utils import prep_data
    import sys
    from sklearn.model_selection import train_test_split

    np.set_printoptions(threshold=np.nan)

    # create a list of XML files within the raw data folder
    image_list = glob.glob("Images/dpi60/*.jpeg")

    # set size of tensor_scope
    tensor_scope = 500

    # Process the images into numpy arrays and return a tensor
    T = prep_data(image_list, all_items = False, less_items = tensor_scope)

    # split into training and testing sets
    labels = image_list[0:tensor_scope]
    data_train, data_test, labels_train, labels_test = train_test_split(T, labels, test_size=0.20, random_state=42)

    # convert to 0-1 floats (reconversion by * 255)
    data_train = data_train.astype('float32') / 255.
    data_test = data_test.astype('float32') / 255.

    # reshape from channels first to channels last
    data_train = np.rollaxis(data_train, 0, 3)
    data_test = np.rollaxis(data_test, 0, 3)

    from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
    from keras.models import Model
    from keras import backend as K

    input_img = Input(shape=(184, 120, 1))  

    x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)

    x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
    x = UpSampling2D((2, 2))(x)
    x = Conv2D(16, (3, 3), activation='relu')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)


    autoencoder = Model(input_img, decoded)
    autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

    data_train_dimensions =  data_train.shape
    data_test_dimensions =  data_test.shape

    data_train = np.reshape(data_train, (data_train_dimensions[2], 184, 120, 1))  # adapt this if using `channels_first` image data format
    data_test = np.reshape(data_test, (data_test_dimensions[2], 184, 120, 1))

    from keras.callbacks import TensorBoard
    autoencoder.fit(data_train, data_test,
                    epochs=50,
                    batch_size=128,
                    shuffle=True,
                    validation_data=(data_train, data_test),
                    callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])

Solution

  • You forget padding="same" on the following line

    x = Conv2D(16, (3, 3), activation='relu')(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
    

    add padding="same"

    x = Conv2D(16, (3, 3), activation='relu', padding="same")(x)
    x = UpSampling2D((2, 2))(x)
    decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)