For some reason I cannot feed one single image into a CNN in pytorch.
I trained it and tested it on a test set, but when I tried to feed a new image into it the dimensions in the network don't match anymore.
tf = transforms.Compose([transforms.ToTensor(),
transforms.Resize((32,32)),
transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
])
dataset = ImageFolder(path, transform=tf)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
batch_size = 4
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)
This is what I used to load the data. Then I trained a model with the following architecture:
class CNN(torch.nn.Module):
def __init__(self):
super().__init__()
self.model = torch.nn.Sequential(
#Input = 3 x 32 x 32, Output = 32 x 32 x 32
torch.nn.Conv2d(in_channels = 3, out_channels = 32, kernel_size = 3, padding = 1),
torch.nn.ReLU(),
#Input = 32 x 32 x 32, Output = 32 x 16 x 16
torch.nn.MaxPool2d(kernel_size=2),
#Input = 32 x 16 x 16, Output = 64 x 16 x 16
torch.nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, padding = 1),
torch.nn.ReLU(),
#Input = 64 x 16 x 16, Output = 64 x 8 x 8
torch.nn.MaxPool2d(kernel_size=2),
#Input = 64 x 8 x 8, Output = 64 x 8 x 8
torch.nn.Conv2d(in_channels = 64, out_channels = 64, kernel_size = 3, padding = 1),
torch.nn.ReLU(),
#Input = 64 x 8 x 8, Output = 64 x 4 x 4
torch.nn.MaxPool2d(kernel_size=2),
torch.nn.Flatten(),
torch.nn.Linear(64*4*4, 512),
torch.nn.ReLU(),
torch.nn.Linear(512, 10)
)
def forward(self, x):
return self.model(x)
When testing the model on the test set with the following code:
test_acc=0
model.eval()
with torch.no_grad():
#Iterating over the training dataset in batches
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
y_true = labels.to(device)
#Calculating outputs for the batch being iterated
outputs = model(images)
#Calculated prediction labels from models
_, y_pred = torch.max(outputs.data, 1)
#Comparing predicted and true labels
test_acc += (y_pred == y_true).sum().item()
print(f"Test set accuracy = {100 * test_acc / len(test_dataset)} %")
It works just fine. But when trying to feed one single image with the following code:
path = "C:/Users/nyden/new_image.jpg"
tf = transforms.Compose([transforms.ToTensor(),
transforms.Resize((32,32)),
transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
])
img = Image.open(path)
img_tf = tf(img).float()
model.eval()
with torch.no_grad():
out = model.forward(img_tf)
_, y_pred = torch.max(out.data, 1)
print(y_pred)
I just get the error: RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x16 and 1024x512)
I don't understand why the dimensions are wrong when I just feed one image in instead of a batch? Any help would be greatly appreciated.
You need to feed image in the shape (batch_size, C, H, W)
. In case of one image, you need to add extra dim, so it will be of shape (1, C, H, W)
as follows:
img_tf = tf(img).float()[None,...]
or
img_tf = tf(img).float().unsqueeze(0)