Search code examples
pythonmachine-learningneural-networkkeraskeras-layer

Transfer Learning fails because dense layer is expected to have shape (None,1)


I'm trying to use InceptionV4 for some classfication problem. Before using it on the problem I'm trying to experiment with it.

I replaced the last dense layer (sized 1001) with a new dense layer, compiled the model and tried to fit it

from keras import backend as K
import inception_v4
import numpy as np
import cv2
import os

from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense, Input

from keras.models import Model
os.environ['CUDA_VISIBLE_DEVICES'] = ''


my_batch_size=32


train_data_dir ='//shared_directory/projects/try_CDFxx/data/train/'
validation_data_dir ='//shared_directory/projects/try_CDFxx/data/validation/'


img_width, img_height = 299, 299
num_classes=3
nb_epoch=50
nbr_train_samples = 24
nbr_validation_samples = 12


def train_top_model (num_classes):

    v4 = inception_v4.create_model(weights='imagenet')
    predictions = Dense(output_dim=num_classes, activation='softmax', name="newDense")(v4.layers[-2].output) # replacing the 1001 categories dense layer with my own 
    main_input= v4.layers[1].input
    main_output=predictions
    t_model = Model(input=[main_input], output=[main_output])
    train_datagen = ImageDataGenerator(
            rescale=1./255,
            shear_range=0.1,
            zoom_range=0.1,
            rotation_range=10.,
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True)

    val_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
            train_data_dir,
            target_size = (img_width, img_height),
            batch_size = my_batch_size,
            shuffle = True,
            class_mode = 'categorical')

    validation_generator = val_datagen.flow_from_directory(
            validation_data_dir,
            target_size=(img_width, img_height),
            batch_size=my_batch_size,
            shuffle = True,
            class_mode = 'categorical')
#

    t_model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#
    t_model.fit_generator(
            train_generator,
            samples_per_epoch = nbr_train_samples,
            nb_epoch = nb_epoch,
            validation_data = validation_generator,
            nb_val_samples = nbr_validation_samples)



train_top_model(num_classes)

But I am getting the following error

Traceback (most recent call last):
  File "re_try.py", line 76, in <module>
    train_top_model(num_classes)
  File "re_try.py", line 72, in train_top_model
    nb_val_samples = nbr_validation_samples)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1508, in fit_generator
    class_weight=class_weight)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1261, in train_on_batch
    check_batch_dim=True)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 985, in _standardize_user_data
    exception_prefix='model target')
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 113, in standardize_input_data
    str(array.shape))
ValueError: Error when checking model target: expected newDense to have shape (None, 1) but got array with shape (24, 3)
Exception in thread Thread-1:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/usr/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 409, in data_generator_task
    generator_output = next(generator)
  File "/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py", line 693, in next
    x = self.image_data_generator.random_transform(x)
  File "/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py", line 403, in random_transform
    fill_mode=self.fill_mode, cval=self.cval)
  File "/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py", line 109, in apply_transform
    final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
AttributeError: 'NoneType' object has no attribute 'interpolation'

What am I doing wrong? And why is the newDense layer expected to have a (None,1) shape after I defined it as having the size of 3?

Many thanks

PS I am adding the end of summary of the model

merge_25 (Merge)                 (None, 8, 8, 1536)    0           activation_140[0][0]
                                                                   merge_23[0][0]
                                                                   merge_24[0][0]
                                                                   activation_149[0][0]
____________________________________________________________________________________________________
averagepooling2d_15 (AveragePool (None, 1, 1, 1536)    0           merge_25[0][0]
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 1, 1, 1536)    0           averagepooling2d_15[0][0]
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 1536)          0           dropout_1[0][0]
____________________________________________________________________________________________________
newDense (Dense)                 (None, 3)             4611        flatten_1[0][0]
====================================================================================================
Total params: 41,210,595
Trainable params: 41,147,427
Non-trainable params: 63,168

Solution

  • Ok, problem lied in

    validation_generator = val_datagen.flow_from_directory(...
            class_mode = 'categorical')
    

    Categorical makes your generator to return a one-hot encoded vector. In your case a 3-d one. But you set your loss to sparse_categorical_crossentropy which accepts int as a label. You should either change class_mode="sparse" or loss="categorical_crossentropy".