I am trying to feed the Neural network dataset of images and I am getting this error I don't know what might be the cause as all the images have different sizes I have also tried to change batch sizes and kernels but I had no success with this.
File "c:\Users\david\Desktop\cs_agent\main.py", line 49, in <module>
for i, data in enumerate(train_loader, 0):
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 530, in __next__
data = self._next_data()
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 570, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
return self.collate_fn(data)
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in default_collate
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 172, in <listcomp>
return [default_collate(samples) for samples in transposed] # Backwards compatibility.
File "C:\Users\david\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\_utils\collate.py", line 138, in default_collate
return torch.stack(batch, 0, out=out)
RuntimeError: stack expects each tensor to be equal size, but got [3, 300, 535] at entry 0 and [3, 1080, 1920] at entry 23
this is my main file
import numpy as np
import matplotlib.pyplot as plt
import torch
import dataset
import os
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision
import check_device
import neural_network
import torch.optim as optim
EPS = 1.e-7
LR=0.5
WEIGHT_DECAY=0.5
batch_size =50
#DATA LOADING ###################################################################################################################
test_dataset =dataset.csHeadBody(csv_file="images\\test_labels.csv",root_dir="images\\test")
train_dataset =dataset.csHeadBody(csv_file="images\\train_labels.csv",root_dir="images\\train")
train_loader =DataLoader(dataset =train_dataset,batch_size=batch_size,shuffle=True)
test_loader =DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
#DATA LOADING ###################################################################################################################END
#NEURAL NET #####################################################################################################################################################
net=neural_network.Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#NEURAL NET END ######################################################################################
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
print(data)
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
and this is my dataset file
class csHeadBody(Dataset):
def __init__(self, csv_file, root_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.root_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
this is my neural network architecture
import torch.nn.functional as F
import torch.nn as nn
import torch
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 535, 535)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = torch.flatten(x, 1) # flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
TLDR: The error was due to different input sizes passed to collate_fn
. If you need to use different input sizes then you must deal with that in transforms (e.g. SmallestMaxSize + Pad or LongestMaxSize + Crop) or write a custom batch sampler.
The error was actually not related to the neural network architecture. It was related to the fact that you tried to create a batch composed of images of different sizes (the error said: stack expects each tensor to be equal size, but got [3, 300, 535] at entry 0 and [3, 1080, 1920] at entry 23
). When you use torch.utils.data.DataLoader
with batch_size
and shuffle
specified (other args default) then the default sampler
and collate_fn
are used (more here). The default collate_fn
tries to stack the data that comes from the dataset __getitem__
method. In your case, the __getitem__
returned images of different sizes ([3, 300, 535]
and [3, 1080, 1920]
) and passed those inputs to torch.stack
, where the error was thrown.
There are 2 common possibilities to deal with multiple input size case:
DataLoader
) to ensure that in a single batch only images of same size are loaded.The second option is a bit harder to implement, but in my opinion it is better since we do not add or remove any information from the image (especially good for generative AI purposes). To implement this option we need to store image sizes information (for each loaded filepath) in the dataset class. Then in custom sampler we will use that information to create batches with indices related to only the images of same size.
Dataset
classfrom typing import Callable
import torchvision.datasets
import imagesize
from PIL import Image
class FilepathsDataset(torchvision.datasets.VisionDataset):
def __init__(
self,
image_filepaths: list[str],
transform: Callable,
):
self.image_filepaths = image_filepaths
imgsz_idxs = {}
for idx, path in enumerate(image_filepaths):
# imagesize package loads only the file headers so it works really fast
width, height = imagesize.get(path)
size = (width, height)
if size not in imgsz_idxs:
imgsz_idxs[size] = [idx]
else:
imgsz_idxs[size].append(idx)
self.imgsz_idxs = imgsz_idxs
self.transform = transform
def get_raw_data(self, idx: int) -> Image.Image:
"""Return raw image"""
image_filepath = self.image_filepaths[idx]
image = Image.open(image_filepath).convert("RGB")
return image
def __len__(self) -> int:
return len(self.image_filepaths)
def __getitem__(self, idx: int) -> tuple[Tensor, Tensor]:
"""Return transformed image and mask"""
target = <some_target> # TODO: load your target here
image = self.get_raw_data(idx)
image = self.transform(image)
return image, target
Sampler
classThis sampler shuffles indices and drops last batch by default (to make sure all batches have the same number of samples)
import copy
import random
from torch.utils.data import Sampler
class SameResolutionSampler(Sampler):
def __init__(self, batch_size: int, imgsz_idxs: dict[tuple[int, int], list[int]]):
self.imgsz_idxs = imgsz_idxs
self.batch_size = batch_size
def __iter__(self) -> list[list[int]]:
imgsz_idxs = copy.deepcopy(self.imgsz_idxs)
batches = []
for size in imgsz_idxs:
while len(imgsz_idxs[size]) >= self.batch_size: # drop last
batch_idxs = []
for i in range(self.batch_size):
idx = imgsz_idxs[size].pop()
batch_idxs.append(idx)
batches.append(batch_idxs)
random.shuffle(batches)
return iter(batches)
def __len__(self):
batches_per_resolution = [
len(idxs) // self.batch_size for idxs in self.imgsz_idxs.values()
]
return int(sum(batches_per_resolution))
DataLoader
instancefilepaths = _ # TODO: paths to your images
transform = _ # TODO: your transforms
batch_size = _ # TODO: your batch_size
ds = FilepathsDataset(filepaths, transform)
sampler = SameResolutionSampler(batch_size, ds.imgsz_idxs)
dataloader = DataLoader(ds, batch_sampler=sampler)
NOTE: Also if you use the second option (custom sampler) and you train some convolutional neural network with linear head, you must check if the network works with different input sizes or add Adaptive Pooling before the linear layers to ensure that the sizes are correct.