Search code examples
pytorchconv-neural-networkimage-resizingtensor

RuntimeError: shape '[10, 3, 150, 150]' is invalid for input of size 472500


I'm trying to perform a Convolutional operation on the covid CT Dataset and constantly getting this error. My image size in the train loader was (10, 150, 150, 3) and I reshaped it into [10, 3, 150, 150], using torch.reshape(). Can anybody help me with problem

My CNN Code

class BConv(nn.Module):
def __init__(self, out=3):
    super(BConv, self).__init__()
    #(10, 150, 150, 3)
    self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
    self.bn1=nn.BatchNorm2d(num_features=12)
    self.relu1=nn.ReLU()
    self.pool=nn.MaxPool2d(kernel_size=2)
    self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
    self.relu2=nn.ReLU()
    # self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
    # self.bn3=nn.BatchNorm2d(num_features=32)
    # self.relu3=nn.ReLU()
    self.fc=nn.Linear(in_features= 20*75*75, out_features=3)
    
def forward(self,input):
    output=self.conv1(input)
    #print("output 1", output.shape)
    output=self.bn1(output)
    #print("output 1", output.shape)
    output=self.relu1(output)
    #print("output 1", output.shape)
    output=self.pool(output)
    #print("output 1", output.shape)
    output=self.conv2(output)
    #print("output 1", output.shape)
    output=self.relu2(output)
    #print("output 1", output.shape)
    # output=self.conv3(output)
    # output=self.bn3(output)
    # output=self.relu3(output)
    print(output.shape)
        
        #Above output will be in matrix form, with shape (256,32,75,75)
    
    output=output.view(output.size(0), -1)
    
        
    output=self.fc(output)
        
    return output

Data Preprocessing

class Ctdataset(Dataset):
    def __init__(self, path):

        self.data= pd.read_csv(path, delimiter=" ")
        data= self.data.values.tolist()
        self.image= []
        self.labels=[]
        for i in data:
            self.image.append(i[0])
            self.labels.append(i[1])

        #print(len(self.image), len(self.labels))
        #self.class_map = {"0": 0, "1":1 , "2": 2}  

    def __len__(self):
        return len(self.image)
        

    def __getitem__(self, idx):
        img_path = os.path.join("2A_images", self.image[idx])
        img= Image.open(img_path).convert("RGB")
        img= img.resize((150, 150))
        img= np.array(img)
        img= img.astype(float)
        return img, label
    
        

Solution

  • Here I'm considering your whole model including the third block consisting of conv3, bn3, and relu3. There are a few things to note:

    • Reshaping is substantially different from permuting the axes. When you say you have an input shape of (batch_size, 150, 150, 3), it means the channel axis is last. Since PyTorch 2D builtin layers work in the NHW format you need to permute the axes: you can do so with torch.Tensor.permute:

      >>> x = torch.rand(10, 150, 150, 3)
      >>> x.permute(0, 3, 1, 2).shape
      (10, 3, 150, 150)
      
    • Assuming your input is shaped (batch_size, 3, 150, 150), then the output shape of relu3 will be (32, 75, 75). As such the following fully connected layer must have exactly 32*75*75 input features.

    • However you need to flatten this tensor as you did in your code with a view: output = output.view(output.size(0), -1). Another approach is to define a self.flatten = nn.Flatten() layer and call it with output = self.flatten(output).

    • As of PyTorch v1.8.0, an alternative to setting the in_features in your fully connected layer is to use nn.LazyLinear which will initialize it for you based on the first inference:

      >>> self.fc = nn.LazyLinear(out_features=3)
      
    • Side note: you don't need to define separate ReLU layers with relu1, relu2, and relu3 as they're non-parametric functions:

      >>> self.relu = nn.ReLU()
      

    Here is the full code for reference:

    class BConv(nn.Module):
        def __init__(self, out=3):
            super().__init__()
    
            # input shape (10, 150, 150, 3)
            self.conv1 = nn.Conv2d(3, 12,kernel_size=3, stride=1, padding=1)
            self.bn1 = nn.BatchNorm2d(num_features=12)
            self.pool = nn.MaxPool2d(kernel_size=2)
            
            self.conv2 = nn.Conv2d(12, 20,kernel_size=3, stride=1, padding=1)
    
            self.conv3 = nn.Conv2d(20, 32, kernel_size=3, stride=1, padding=1)
            self.bn3 = nn.BatchNorm2d(num_features=32)
            
            self.relu = nn.ReLU()
            self.flatten = nn.Flatten()
            self.fc = nn.Linear(in_features=32*75*75, out_features=out)
        
        def forward(self,input):
            output = input.permute(0, 3, 1, 2)
    
            output = self.conv1(output)
            output = self.bn1(output)
            output = self.relu(output)
            output = self.pool(output)
    
            output = self.conv2(output)
            output = self.relu(output)
    
            output = self.conv3(output)
            output = self.bn3(output)
            output = self.relu(output)
            
            output = self.flatten(output)
            output = self.fc(output)
                
            return output