Search code examples
kerascomputer-visionconv-neural-networkresnet

ValueError: A target array with shape (128, 36) was passed for an output of shape (None, 2048) while using as loss `categorical_crossentropy`


I am new to CNN transfer learning. I am trying to use ResNet50 transfer learning by combining MINST dataset & Kaggle A-Z dataset. As I am using Resnet50, I changed channels to 3. Now I am getting the value error. At output avg_pool error is coming. How can I override this or do I need to modify my code? I need to maintain classes as 36. I tried GlobalAvgPooling2D as well, but getting same error.

def load_az_dataset(datasetPath):

    data = []
    labels = []

    for row in open(datasetPath):
        row = row.split(",")
        label = int(row[0])
        image = np.array([int(x) for x in row[1:]], dtype="uint8")

        image = image.reshape((28, 28))
        data.append(image)
        labels.append(label)
        
    data = np.array(data, dtype="float32")
    labels = np.array(labels, dtype="int")
    
    return (data, labels)


def load_mnist_dataset():

    ((trainData, trainLabels), (testData, testLabels)) = mnist.load_data()
    data = np.vstack([trainData, testData])
    labels = np.hstack([trainLabels, testLabels])
    return (data, labels)


EPOCHS = 50
INIT_LR = 1e-1
BS = 128

(azData, azLabels) = load_az_dataset('A_Z Handwritten Data\A_Z Handwritten Data.csv')
(digitsData, digitsLabels) = load_mnist_dataset()

azLabels += 10
data = np.vstack([azData, digitsData])
labels = np.hstack([azLabels, digitsLabels])
data = [cv2.resize(image, (32, 32)) for image in data]
data = np.array(data, dtype="float32")

data = np.expand_dims(data, axis=-1)
data = np.repeat(data, 3, axis=-1)
data /= 255.0

le = LabelBinarizer()
labels = le.fit_transform(labels)
counts = labels.sum(axis=0)

classTotals = labels.sum(axis=0)
classWeight = {}
for i in range(0, len(classTotals)):
    classWeight[i] = classTotals.max() / classTotals[i]
    
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.20, stratify=labels, random_state=42)

aug = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.05,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,
    fill_mode="nearest")

opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)

model = ResNet50(include_top=False,input_shape=(32,32,3),pooling='avg',classes=len(le.classes_),weights='imagenet')

model.compile(optimizer=opt,loss="categorical_crossentropy",metrics=["accuracy"])

H = model.fit(
        aug.flow(trainX, trainY, batch_size=BS),
        validation_data=(testX, testY),
        steps_per_epoch=len(trainX) // BS,
        epochs=EPOCHS,
        class_weight=classWeight,
        verbose=1)

X_train: (353960, 32, 32, 3) Y_train: (353960, 36) X_test: (88491, 32, 32, 3) Y_test: (88491, 36)

I added only first and last lines of model summary:

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_21 (InputLayer)           [(None, 32, 32, 3)]  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 38, 38, 3)    0           input_21[0][0]                   
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 16, 16, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 16, 16, 64)   256         conv1_conv[0][0]                 
__________________________________________________________________________________________________
conv1_relu (Activation)         (None, 16, 16, 64)   0           conv1_bn[0][0]                   
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 18, 18, 64)   0           conv1_relu[0][0]                 
__________________________________________________________________________________________________
######GAP############

conv5_block3_2_relu (Activation (None, 1, 1, 512)    0           conv5_block3_2_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_3_conv (Conv2D)    (None, 1, 1, 2048)   1050624     conv5_block3_2_relu[0][0]        
__________________________________________________________________________________________________
conv5_block3_3_bn (BatchNormali (None, 1, 1, 2048)   8192        conv5_block3_3_conv[0][0]        
__________________________________________________________________________________________________
conv5_block3_add (Add)          (None, 1, 1, 2048)   0           conv5_block2_out[0][0]           
                                                                 conv5_block3_3_bn[0][0]          
__________________________________________________________________________________________________
conv5_block3_out (Activation)   (None, 1, 1, 2048)   0           conv5_block3_add[0][0]           
__________________________________________________________________________________________________
avg_pool (GlobalAveragePooling2 (None, 2048)         0           conv5_block3_out[0][0]           
==================================================================================================
Total params: 23,587,712
Trainable params: 23,534,592
Non-trainable params: 53,120
______________________________

Solution

  • You correctly loaded the ResNet model without the top layer, that is by setting the parameter include_top=False. But you have to provide a new layer, a new head to your model now. You just have to concatenate a Dense layer to the ResNet model. The number of nodes provided must be the same as the number of classes you want to classify: in this case 36.

    You can fix your error like this:

    import tensorflow as tf
    from tensorflow.keras.applications.resnet50 import ResNet50
    from tensorflow.keras.layers import Flatten, Dense
    from tensorflow.keras import Model
    ...
    base = ResNet50(include_top=False, input_shape=(32,32,3), pooling='avg', classes=36, weights='imagenet')
    x = Flatten()(base.output)
    x = Dense(36, activation='softmax')(x)
    model = Model(inputs=base.inputs, outputs=x)
    ...