Search code examples
kerasdeep-learningconv-neural-networkautoencoderunsupervised-learning

Unsupervised Convolutional Autoencoder is always giving blank output


I'm trying to train an autoencoder with unsupervised images. I have about 300 train images and 100 validation images. But when I inputted an unseen image to the trained autoencoder, it is giving complete blank output.

train_images = os.listdir('./Data/train')
val_images = os.listdir('./Data/val')

X_train = []
X_val = []

for i in range(len(train_images)):
    img = cv2.imread('./Data/train/'+train_images[i])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
    X_train.append(resized)

X_train = np.asarray(X_train)
X_train = X_train.astype('float32')/255.
X_train = np.reshape(X_train, (len(X_train), 224, 224, 1))

for i in range(len(val_images)):
    img = cv2.imread('./Data/val/'+val_images[i])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
    X_val.append(resized)

X_val = np.asarray(X_val)
X_val = X_val.astype('float32')/255.
X_val = np.reshape(X_val, (len(X_val), 224, 224, 1))

print(len(X_train))
print(len(X_val))

Here X_train.shape and X_val.shape respectively are (300,224,224,1) and (100, 224, 224, 1)

here is my upconv_concat function

def upconv_concat(bottom_a, bottom_b, n_filter, pool_size, stride, padding='VALID'):
    up_conv = Conv2DTranspose(filters=n_filter, kernel_size=[pool_size, pool_size],
                                         strides=stride, padding=padding)(bottom_a)
    return Concatenate(axis=-1)([up_conv, bottom_b])

here are some parameters

input_img = Input(shape=(224, 224, 1))
droprate=0.25
num_classes = 1

and here is my model

conv_1_1 = Conv2D(filters = 64, kernel_size = 3, activation='relu', padding='same')(input_img)
conv_1_1_bn = BatchNormalization()(conv_1_1)
conv_1_1_do = Dropout(droprate)(conv_1_1_bn)

pool_1 = MaxPooling2D(pool_size= 2, strides = 2)(conv_1_1_do)

conv_4_1 = SeparableConv2D(filters = 512, kernel_size = 3, activation='relu', padding='same')(pool_1)
conv_4_1_bn = BatchNormalization()(conv_4_1)
conv_4_1_do = Dropout(droprate)(conv_4_1_bn)

pool_4 = MaxPooling2D(pool_size= 2, strides = 2)(conv_4_1_do)

conv_5_1 = SeparableConv2D(filters = 1024, kernel_size = 3, activation='relu', padding='same')(pool_4)
conv_5_1_bn = BatchNormalization()(conv_5_1)
conv_5_1_do = Dropout(droprate)(conv_5_1_bn)

upconv_1 = upconv_concat(conv_5_1_do, conv_4_1_do, n_filter=512, pool_size=2, stride=2) 

conv_6_1 = SeparableConv2D(filters = 512, kernel_size = 3, activation='relu', padding='same')(upconv_1)
conv_6_1_bn = BatchNormalization()(conv_6_1)
conv_6_1_do = Dropout(droprate)(conv_6_1_bn)


upconv_2 = upconv_concat(conv_6_1_do, conv_1_1_do, n_filter=64, pool_size=2, stride=2) 

conv_9_1 = SeparableConv2D(filters = 64, kernel_size = 3, activation='relu', padding='same')(upconv_2)
conv_9_1_bn = BatchNormalization()(conv_9_1)
conv_9_1_do = Dropout(droprate)(conv_9_1_bn)


ae_output = Conv2D(num_classes, kernel_size=1, strides = (1,1), activation="softmax")(conv_9_1_do)

here is the training part

ae_model = Model(input_img, ae_output)
ae_model.compile(optimizer='adadelta', loss='binary_crossentropy')
ae_model.fit(X_train, X_train,
                epochs=5,
                batch_size=16,
                shuffle=True,
                validation_data=(X_val, X_val))

if anyone needs the model summary

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 224, 224, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 224, 224, 64) 640         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 224, 224, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 224, 224, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 112, 112, 64) 0           dropout_1[0][0]                  
__________________________________________________________________________________________________
separable_conv2d_1 (SeparableCo (None, 112, 112, 512 33856       max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 112, 112, 512 2048        separable_conv2d_1[0][0]         
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 112, 112, 512 0           batch_normalization_2[0][0]      
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)  (None, 56, 56, 512)  0           dropout_2[0][0]                  
__________________________________________________________________________________________________
separable_conv2d_2 (SeparableCo (None, 56, 56, 1024) 529920      max_pooling2d_2[0][0]            
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 56, 56, 1024) 4096        separable_conv2d_2[0][0]         
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 56, 56, 1024) 0           batch_normalization_3[0][0]      
__________________________________________________________________________________________________
conv2d_transpose_1 (Conv2DTrans (None, 112, 112, 512 2097664     dropout_3[0][0]                  
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 112, 112, 102 0           conv2d_transpose_1[0][0]         
                                                                 dropout_2[0][0]                  
__________________________________________________________________________________________________
separable_conv2d_3 (SeparableCo (None, 112, 112, 512 534016      concatenate_1[0][0]              
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 112, 112, 512 2048        separable_conv2d_3[0][0]         
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 112, 112, 512 0           batch_normalization_4[0][0]      
__________________________________________________________________________________________________
conv2d_transpose_2 (Conv2DTrans (None, 224, 224, 64) 131136      dropout_4[0][0]                  
__________________________________________________________________________________________________
concatenate_2 (Concatenate)     (None, 224, 224, 128 0           conv2d_transpose_2[0][0]         
                                                                 dropout_1[0][0]                  
__________________________________________________________________________________________________
separable_conv2d_4 (SeparableCo (None, 224, 224, 64) 9408        concatenate_2[0][0]              
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 224, 224, 64) 256         separable_conv2d_4[0][0]         
__________________________________________________________________________________________________
dropout_5 (Dropout)             (None, 224, 224, 64) 0           batch_normalization_5[0][0]      
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 224, 224, 1)  65          dropout_5[0][0]                  
==================================================================================================
Total params: 3,345,409
Trainable params: 3,341,057
Non-trainable params: 4,352
__________________________________________________________________________________________________

I have absolutely checked the X_train images to see if I am sending blank images by mistake. But no. I'm sending proper data only.

The problem is

when I tried to test the model, it is giving a blank image.

img = cv2.imread('./test/a184.jpg')
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
resized = cv2.resize(img, (224,224), interpolation = cv2.INTER_AREA)
resized = resized/255
resized = resized[:, :, np.newaxis]
resized = resized[np.newaxis, :, :] 

now resized is of shape (1,224,224,1)

image = ae_model.predict(resized)
image = image.reshape((224,224))

plt.imshow(image, cmap= 'gray')

gives me this image:
enter image description here but the values in image variable are all 1s.

I'm using tf.Keras.

Please help me with this. Unable to locate where the problem is and how to debug to find it.


Solution

  • The black image comes from the way you are activating your last conv layer.
    Here you want to predict, for each pixel, a value between 0 and 1, so you need a Sigmoid activation, not a Softmax one.

    Try with this instead :

    ae_output = Conv2D(num_classes, kernel_size=1, strides = (1,1), activation="sigmoid")(conv_9_1_do)