Search code examples
pythonpytorchconv-neural-networktorchtorchvision

How to feed on single image into a pytorch CNN?


For some reason I cannot feed one single image into a CNN in pytorch.

I trained it and tested it on a test set, but when I tried to feed a new image into it the dimensions in the network don't match anymore.

tf = transforms.Compose([transforms.ToTensor(),
                         transforms.Resize((32,32)),
                         transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
                         ])

dataset = ImageFolder(path, transform=tf)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 4
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

This is what I used to load the data. Then I trained a model with the following architecture:

class CNN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.model = torch.nn.Sequential(
            #Input = 3 x 32 x 32, Output = 32 x 32 x 32
            torch.nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 1), 
            torch.nn.ReLU(),
            #Input = 32 x 32 x 32, Output = 32 x 16 x 16
            torch.nn.MaxPool2d(kernel_size=2),
  
            #Input = 32 x 16 x 16, Output = 64 x 16 x 16
            torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
            torch.nn.ReLU(),
            #Input = 64 x 16 x 16, Output = 64 x 8 x 8
            torch.nn.MaxPool2d(kernel_size=2),
              
            #Input = 64 x 8 x 8, Output = 64 x 8 x 8
            torch.nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
            torch.nn.ReLU(),
            #Input = 64 x 8 x 8, Output = 64 x 4 x 4
            torch.nn.MaxPool2d(kernel_size=2),
  
            torch.nn.Flatten(),
            torch.nn.Linear(64*4*4, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 10)
        )
  
    def forward(self, x):
        return self.model(x)

When testing the model on the test set with the following code:

test_acc=0
model.eval()
  
with torch.no_grad():
    #Iterating over the training dataset in batches
    for i, (images, labels) in enumerate(test_loader):
          
        images = images.to(device)
        y_true = labels.to(device)
          
        #Calculating outputs for the batch being iterated
        outputs = model(images)
          
        #Calculated prediction labels from models
        _, y_pred = torch.max(outputs.data, 1)
          
        #Comparing predicted and true labels
        test_acc += (y_pred == y_true).sum().item()
      
    print(f"Test set accuracy = {100 * test_acc / len(test_dataset)} %")

It works just fine. But when trying to feed one single image with the following code:

path = "C:/Users/nyden/new_image.jpg"

tf = transforms.Compose([transforms.ToTensor(),
                         transforms.Resize((32,32)),
                         transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
                         ])

img = Image.open(path)
img_tf = tf(img).float()
model.eval()
with torch.no_grad():
    out = model.forward(img_tf)
    _, y_pred = torch.max(out.data, 1)
    print(y_pred)

I just get the error: RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x16 and 1024x512)

I don't understand why the dimensions are wrong when I just feed one image in instead of a batch? Any help would be greatly appreciated.


Solution

  • You need to feed image in the shape (batch_size, C, H, W). In case of one image, you need to add extra dim, so it will be of shape (1, C, H, W) as follows:

    img_tf = tf(img).float()[None,...]
    

    or

    img_tf = tf(img).float().unsqueeze(0)