Search code examples
pythonkerasneural-networkconv-neural-networkgoogle-colaboratory

Error using Colab GPU, while none with CPU


I'm trying some code in Google Colab. Using CPU it works fine, but when I switch to GPU it shows errors.

Self-contained code:

import numpy as np
import tensorflow as tf
import keras
from keras.layers import Input, BatchNormalization, Activation
from keras.layers import ZeroPadding2D, MaxPooling2D, Dense
from keras.layers import Reshape, Add, Dropout
from keras.layers import Conv2D
from keras.layers import Conv3DTranspose, Conv2DTranspose
from keras.initializers import VarianceScaling
from keras.models import Model
from keras.regularizers import l2
from keras.optimizers import SGD
import sys

# hyperparameters
BATCH_NORM_MOMENTUM = 0.1
BATCH_NORM_EPS = 1e-5
KERNEL_REGULARIZER = 0.0001
batchSize = 4

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)


def step1(input_shape = (3, 256, 256)):

    step = 'step1_'
    X_input = Input(input_shape, name = step + 'input')

    X = Conv2D(64, (7, 7), strides = (2, 2),  padding='same', data_format = 'channels_first', kernel_initializer="he_normal",kernel_regularizer=l2(KERNEL_REGULARIZER), name = step+'b1_conv_a',)(X_input)
    X = BatchNormalization(axis = 1, momentum=BATCH_NORM_MOMENTUM, epsilon = BATCH_NORM_EPS, name = step+'b1_bn_a')(X)
    X = Activation('relu', name = step+'b1_act_a')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2), data_format='channels_first', padding='same', name = step + 'b1_maxpool2d_a')(X)
    print(X.shape)
    model = Model(inputs = X_input, outputs = X, name='step1')

    return model

step1Model = step1((3,256,256))

Error:

ValueError: Shape must be rank 1 but is rank 0 for 'step1_b1_bn_a/cond/Reshape_4' (op: 'Reshape') with input shapes: [1,64,1,1], [].

Why is there this difference between using CPU and GPU ?


Solution

  • This probably has to do with tensorflow and tensorflow-gpu packages on CPU and GPU kernels respectively.

    You can bypass it but removing axis=1 from BatchNormalization layer

    change:

    X = BatchNormalization(axis = 1, momentum=BATCH_NORM_MOMENTUM, epsilon = BATCH_NORM_EPS, name = step+'b1_bn_a')(X)
    

    to:

    X = BatchNormalization(momentum=BATCH_NORM_MOMENTUM, epsilon = BATCH_NORM_EPS, name = step+'b1_bn_a')(X)