Search code examples
pythonpytorchconv-neural-network

Pytorch Conv Layers : input dimensions


I have looked at some similar posts but couldn't find any solution to my specific problem.

I'm running this code :

class MyNet(nn.Module):
    def __init__(self,input_dim):
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(input_dim, nChannel, kernel_size=3, stride=1, padding=1 )
        self.bn1 = nn.BatchNorm2d(nChannel)
        self.conv2 = nn.ModuleList()
        self.bn2 = nn.ModuleList()
        for i in range(nConv-1):
            self.conv2.append( nn.Conv2d(nChannel, nChannel, kernel_size=3, stride=1, padding=1 ) )
            self.bn2.append( nn.BatchNorm2d(nChannel) )
        self.conv3 = nn.Conv2d(nChannel, nChannel, kernel_size=1, stride=1, padding=0 )
        self.bn3 = nn.BatchNorm2d(nChannel)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu( x )
        x = self.bn1(x)
        for i in range(nConv-1):
            x = self.conv2[i](x)
            x = F.relu( x )
            x = self.bn2[i](x)
        x = self.conv3(x)
        x = self.bn3(x)
        return x

img_list = sorted(glob.glob(input_path+'0.png'))
im = cv2.imread(img_list[0])

# train
model = MyNet( im.shape[2] )
if use_cuda:
    model.cuda()
model.train()

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

for batch_idx in range(maxIter):
    print('Training started. '+str(datetime.datetime.now())+'   '+str(batch_idx+1)+' / '+str(maxIter))
    for im_file in range(int(len(img_list)/batch_size)):
        for loop in tqdm.tqdm(range(maxUpdate)):
            im = []
            for batch_count in range(batch_size):
                # load image
                resized_im = cv2.imread(img_list[batch_size*im_file + batch_count])
                print(resized_im.shape)
                resized_im = cv2.resize(resized_im, dsize=(224, 224))
                resized_im = resized_im.transpose( (2, 0, 1) ).astype('float32')/255.
                print(resized_im.shape)
                im.append(resized_im)

            data = torch.from_numpy( np.array(im) )
            print(data.shape)
            if use_cuda:
                data = data.cuda()
            data = Variable(data)

            # forwarding
            optimizer.zero_grad()
            output = model( data )

I have no problem while training the model here are the different shapes the prints are giving :
(200, 200, 3) for the first resized_im
(3, 224, 224) for the transposed resized_im
torch.Size([1, 3, 224, 224]) for the tensor data

But I'm not able to run it with this part of slightly modified code where reference_img is a pre-loaded image :

                # Load image
                print(reference_img[batch_size*img_idx + batch_count].shape)
                resized_img = cv2.resize(reference_img[batch_size*img_idx + batch_count], dsize=(224, 224))
                print(resized_img.shape)
                resized_img = resized_img.transpose((2, 0, 1)).astype('float32')/255.
                print(resized_img.shape)
                img.append(resized_img)

            data = torch.from_numpy(np.array(img))
            print(data.shape)

I got this error :

RuntimeError: Given groups=1, weight of size [100, 200, 3, 3], expected input[1, 3, 224, 224] to have 200 channels, but got 3 channels instead

Here are the shapes in the second case :
(200, 200, 3) for the reference_img[batch_size*img_idx + batch_count]
(224, 224, 3) for the resized_img
(3, 224, 224) for the transposed resized_img
torch.Size([1, 3, 224, 224]) for the tensor data

Thank you in advance for your answers and sorry if I made some obvious mistakes


Solution

  • My guess is that this line model = MyNet( im.shape[2]) is causing your issue. Your 2D conv layers expect an input of size [_,200,_,_], because your input_dim for the conv layer is set by the above line. Print out the shape of im and verify it is as expected.