Search code examples
pythonkeyerrorpytorch

KeyError when trying to modify pytorch-example


I'm trying to modify this pytorch-example (https://github.com/pytorch/examples/blob/master/mnist/main.py) to work with my own dataset.

I tried to feed my data into a dataloader. I encapsulated the data in two different ways: one time as an extension of torch.utils.data.Dataset and one time as torch.utils.data.TensorDataset. Unfortunately I always get the same error that I don't understand:

Traceback (most recent call last):
  File "main.py", line 142, in <module>
    train(epoch)
  File "main.py", line 112, in train
    output = model(data)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 210, in __call__
    result = self.forward(*input, **kwargs)
  File "main.py", line 90, in forward
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 210, in __call__
    result = self.forward(*input, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/conv.py", line 235, in forward
    self.padding, self.dilation, self.groups)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/functional.py", line 54, in conv2d
    return f(input, weight, bias) if bias is not None else f(input, weight)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/conv.py", line 33, in forward
    output = self._update_output(input, weight, bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/conv.py", line 88, in _update_output
    return self._thnn('update_output', input, weight, bias)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/conv.py", line 147, in _thnn
    return impl[fn_name](self, self._bufs[0], input, weight, *args)
  File "/usr/local/lib/python2.7/dist-packages/torch/nn/_functions/conv.py", line 213, in call_update_output
    backend = type2backend[type(input)]
  File "/usr/local/lib/python2.7/dist-packages/torch/_thnn/__init__.py", line 13, in __getitem__
    return self.backends[name].load()
KeyError: <class 'torch.cuda.ByteTensor'>

Here's my main.py, which is basically this example: https://github.com/pytorch/examples/blob/master/mnist/main.py

from __future__ import print_function
import argparse
import os
import glob
import numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from PIL import Image
from torchvision import datasets, transforms
from torch.autograd import Variable
from InputData import InputData

# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                    help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=10, metavar='N',
                    help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=1, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                    help='learning rate (default: 0.01)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                    help='how many batches to wait before logging training status')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

# Original DataLoader - WORKS:
'''
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.batch_size, shuffle=True, **kwargs)
'''

# DataLoader as extension of data.Dataset:

train_loader = torch.utils.data.DataLoader(InputData('~/bakk-arbeit/data', train=True),
                                           batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(InputData('~/bakk-arbeit/data', train=False),
                                          batch_size=args.batch_size, shuffle=True, **kwargs)



# DataLoader as data.TensorDataset:
'''
data_folder = os.path.expanduser('~/bakk-arbeit/data')
InputData = InputData()
train = data_utils.TensorDataset(InputData.read_image_files(os.path.join(data_folder, 'training')),InputData.read_label_files(os.path.join(data_folder, 'training')))
test = data_utils.TensorDataset(InputData.read_image_files(os.path.join(data_folder, 'test')),InputData.read_label_files(os.path.join(data_folder, 'test')))
train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = data_utils.DataLoader(test, batch_size=args.batch_size, shuffle=True, **kwargs)
'''


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5) # change to 3 input channels for InputData!
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50) # change 320 to 500 for InputData to match 32x32
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320) # # change 320 to 500 for InputData to match 32x32
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

model = Net()
if args.cuda:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # data = data.numpy()
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))



for epoch in range(1, args.epochs + 1):
    train(epoch)
    test()

...and here's my InputData.py that extends data.Dataset:

import torch
import numpy
import torch.utils.data as data
import glob
import os
from PIL import Image


class InputData(data.Dataset):
    train_folder = 'training'
    test_folder = 'test'

    def __init__(self, root='', train=True):
        self.root = os.path.expanduser(root)
        self.train = train  # training set or test set
        if root:
            if self.train:
                self.training_labels = self.read_label_files(os.path.join(self.root, self.train_folder))
                #with open(os.path.join(self.root, 'training_labels.pt'), 'wb') as f:
                    # torch.save(self.read_label_files(os.path.join(self.root, self.train_folder)), f)
                # with open(os.path.join(self.root, 'training_images.pt'), 'wb') as f:
                    #torch.save(self.read_image_files(os.path.join(self.root, self.train_folder)), f)
                self.training_images = self.read_image_files(os.path.join(self.root, self.train_folder))
            else:
                self.test_images = self.read_image_files(os.path.join(self.root, self.test_folder))
                self.test_labels = self.read_label_files(os.path.join(self.root, self.test_folder))
        print('initialized')

    def read_image_files(self, path):
        print('reading image files...')
        image_list = []
        # ten = torch.ByteTensor(3,32,32)
        for filename in glob.glob(path + '/*.png'):
            im = Image.open(filename)
            data = numpy.asarray(im)
            data = numpy.swapaxes(data,0,2)
            image_list.append(data)
        image_list = numpy.asarray(image_list)
        t = torch.from_numpy(image_list)
        # ten = torch.stack([ten, t])
        print('done!')
        return t

    def read_label_files(self, path):
        print('reading labels...')
        labels = []
        for filename in glob.glob(path + '/*.png'):
            base = os.path.basename(filename)
            im_class = int(base[:1])
            labels.append(im_class)
        print('done!')
        return torch.LongTensor(labels)

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        if self.train:
            img, target = self.training_images[Index], self.training_labels[Index]
        else:
            img, target = self.test_images[Index], self.test_labels[Index]

        # img = Image.fromarray(img.numpy(), mode='RGB') 
        # -> won't work for me??? returns TypeError: batch must contain tensors, numbers, or lists; found <class 'PIL.Image.Image'>

        return img, target

    def __len__(self):
        if self.train:
            return len(self.training_images)
        else:
            return len(self.test_images)

What am I doing wrong?


Solution

  • It seems that most operation are defined on FloatTensor and DoubleTensor (source), and your model gets a ByteTensor in model(data).

    I would go ahead an make sure that my dataset object outputs FloatTensors. Debug the line before model(data) and see the tensor type of data. I would guess it's a ByteTensor, that would be a good place to start.