2D Convolutional neural networks with variable size images

I have implemented a convolutional auto-encoder with Keras, using the Theano backend. I am changing my approach to try to deal with images of different sizes. As long as I use numpy's stack function to build the dataset (equal size images) I am golden. However, for different size images we cannot use stack, and fit expects a numpy array. So I changed to fit_generator to avoid the size checks. The problem is that the last layer is expecting 16 as the last dimension in the input, and I cannot understand why it is getting the dimensions of the original image.

Have a look at the code bellow, and the error output.


import numpy as np
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D

AE_EPOCHS = 10
VERB = 1
batchsz = 16
outfun = 'sigmoid'

data = []
dimensions = [(10, 15), (12, 15), (7,15), (20,15), (25,15)]

for d in dimensions:
    dd = np.random.rand(*d)
    dd = dd.reshape((1,)+dd.shape)
    data.append(dd)

input_img = Input(shape=(1, None, 15))
filtersz = 3
pad_it = 'same'
size1 = 16
size2 = 8
x = Conv2D(size1, (filtersz, filtersz), activation='relu', padding=pad_it)(input_img)
x = MaxPooling2D((2, 2), padding=pad_it)(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = MaxPooling2D((2, 2), padding=pad_it)(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
encoded = MaxPooling2D((2, 2), padding=pad_it)(x)

x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(encoded)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
x = Conv2D(size1, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
decoded = Conv2D(1, (filtersz, filtersz), activation=outfun, padding=pad_it)(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss= 'binary_crossentropy')

x_train = data[1:]
x_test= data[0].reshape((1,)+ data[0].shape)

def mygen(xx, *args, **kwargs):
    for i in xx:
        yield (i,i)

thegen = mygen(x_train)
#If I use this generator somehow None is returned so it is not used
thegenval = mygen(np.array([x_test]))

hist = autoencoder.fit_generator(thegen,
                epochs=AE_EPOCHS,
                steps_per_epoch=4,
                verbose=VERB,
                validation_data=(x_test, x_test),
                validation_steps=1
                )

Traceback (most recent call last):

File "stacko.py", line 107, in validation_steps=1

File "/usr/local/lib/python3.5/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper return func(*args, **kwargs)

File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1847, in fit_generator val_x, val_y, val_sample_weight)

File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1315, in _standardize_user_data exception_prefix='target')

File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 139, in _standardize_input_data str(array.shape))

ValueError: Error when checking target: expected conv2d_7 to have shape (None, 1, None, 16) but got array with shape (1, 1, 10, 15)

Solution

There are two problems with the code above: first, the size of the images' axis must be a multiple of the smallest number of filters per layer (in this case 8); second, the generators for fit_generator must return batches (4D numpy arrays).

The generator is implemented with itertools.cycle and reshapes the figures as one sample batches (if working with multiple images with common sizes one could have variable size batches for each group of dimensions). The working example is below.


import numpy as np
from itertools import cycle

import keras
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D

AE_EPOCHS = 10
VERB = 1
outfun = 'sigmoid'

data = []
dimensions = [(16, 32), (24, 32), (8,32), (32,32)]
for d in dimensions:
    dd = np.random.rand(*d)
    dd = dd.reshape((1,)+dd.shape)
    data.append(dd)

input_img = Input(shape=(1, None, 32))
filtersz = 3
pad_it = 'same'
size1 = 16
size2 = 8
x = Conv2D(size1, (filtersz, filtersz), activation='relu', padding=pad_it)(input_img)
x = MaxPooling2D((2, 2), padding=pad_it)(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = MaxPooling2D((2, 2), padding=pad_it)(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
encoded = MaxPooling2D((2, 2), padding=pad_it)(x)

x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(encoded)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
x = Conv2D(size2, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
x = Conv2D(size1, (filtersz, filtersz), activation='relu', padding=pad_it)(x)
x = UpSampling2D((2, 2), data_format="channels_first")(x)
decoded = Conv2D(1, (filtersz, filtersz), activation=outfun, padding=pad_it)(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss= 'binary_crossentropy')


x_train = data[1:]
x_test= [data[0]]

def mygen(xx, *args, **kwargs):
    for i in cycle(xx):
        ii = i.reshape((1,)+i.shape)
        yield ii,ii

thegen = mygen(x_train)
thegenval = mygen(x_test)

hist = autoencoder.fit_generator(
                thegen,
                epochs=AE_EPOCHS,
                steps_per_epoch=3,
                verbose=VERB,
                validation_data=thegenval,
                validation_steps=1
                )