python machine-learning pytorch dataset artificial-intelligence

Pytorch custom dataset not loading correctly

I'm new to AI/ML and I'm having issues creating a custom data set. I tried following https://pytorch.org/tutorials/beginner/data_loading_tutorial.html but my data set is not being created correctly or I am doing something wrong. I would appreciate any feed back.

When I run this code I get the error:TypeError: conv2d(): argument 'input' (position 1) must be Tensor, not str

This seems to be because when the animal_trainset variable is created all image, label pairs are now set to the strings 'image' and 'label'. I've checked the MyDataset function and the ToTensor function and the images and labels seem correct, but when combined something breaks.

Here is my code:

import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class MyDataset(Dataset):

    def __init__(self, csv_file, transform=None):
        self.animals = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.animals)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = self.animals.iloc[idx, 0]
        if 'cat' in img_name:
            label = 'cat'
        if 'dog' in img_name:
            label = 'dog'
        if 'rat' in img_name:
            label = 'rat'
        if 'bat' in img_name:
            label = 'bat'
        else:
            label = 'lizard'
        image = io.imread(img_name)
        sample = {'image': image, 'label': label}
        print(label)

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = image.transpose((2, 0, 1))
        print(label)
        return {'image': torch.from_numpy(image),'label': label}

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

fig = plt.figure()

animal_trainset = MyDataset(csv_file = '/Classifier/CombinedCSV.csv', transform=ToTensor())
animal_trainloader = DataLoader(animal_trainset, batch_size=4)

animal_testset = MyDataset(csv_file = '/Classifier/CombinedCSV.csv', transform=ToTensor())
animal_testloader = DataLoader(animal_testset, batch_size=4)

classes = ('dog', 'cat', 'bat', 'rat', 'lizard')

net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(animal_trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        print(inputs)
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

Solution

You are returning a dictionary instead of a tuple as needed by the rest of the code.
After your transformation, return {'image': torch.from_numpy(image),'label': label} of the ToTensor function gets splitted by the inputs, labels = data part, and so inputs and labels contain the key names instead of the values (since it's a dictionary).

Try returning a tuple: return torch.from_numpy(image), label

Your implementation should be generic to if you use a transformation, so you should actually change more stuff:

...
  def __getitem__(self, idx):
        ...
        image = io.imread(img_name)
        sample = (image, label)
        print(label)

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    def __call__(self, sample):
        image, label = sample
        
        ...

        return (torch.from_numpy(image), label)

You can see that in your attached tutorial, that uses a dictionary, they access the values directly with

    print(i_batch, sample_batched['image'].size(),
          sample_batched['landmarks'].size())

Both methods can work, I've seen the tuple version used more.