I am using MNIST to train a CNN using the following code:
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
train_data = datasets.MNIST(
root="data",
train=True,
transform=ToTensor(),
download=True
)
test_data = datasets.MNIST(
root="data",
train=False,
transform=ToTensor(),
download=True
)
# Process data to batches
loaders = {
"train": DataLoader(train_data,
batch_size=100,
shuffle=True,
num_workers=0),
"test": DataLoader(test_data,
batch_size=100,
shuffle=True,
num_workers=0)
}
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x), 2)))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.softmax(x)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(),
lr=0.001)
loss_fn = nn.CrossEntropyLoss()
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(loaders["train"]):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
if batch_idx % 20 == 0:
print(f'Train epoch: {epoch} [{batch_idx * len(data)}/{len(loaders["train"].dataset)} ({100. * batch_idx / len(loaders["train"]):.0f}%)]\t{loss.item():.6f}')
def test():
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in loaders["test"]:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += loss_fn(output, target).item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(loaders['test'].dataset)
print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(loaders["test"].dataset)} ({100. * correct / len(loaders["test"].dataset):.0f}%\n)')
for epoch in range(1, 11):
train(epoch)
test()
But when executing this, I get the following stacktrace of errors:
Traceback (most recent call last):
File "c:/Users/u/Projects/venv/src/digits_model_training.py", line 103, in <module>
train(epoch)
File "c:/Users/u/Projects/venv/src/digits_model_training.py", line 76, in train
output = model(data)
File "C:\Users\u\Projects\venv\venv\lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\u\Projects\venv\venv\lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "c:/Users/u/Projects/venv/src/digits_model_training.py", line 51, in forward
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x), 2)))
File "C:\Users\u\Projects\venv\venv\lib\site-packages\torch\nn\modules\module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\u\Projects\venv\venv\lib\site-packages\torch\nn\modules\module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
TypeError: forward() takes 2 positional arguments but 3 were given
I have only passed x, which is the image, to the forward() function. Therefore I am surprised by the error. The error, however, seems to be pretty common (e.g. see here or here) and can have a variety of underlying reasons. Mostly it is because people have passed multiple arguments. But I haven't, at least not conciously. One notice, although it is most probably completely unrelated to the error: I do not have a NVIDIA GPU, which is why in my case, device=torch.device('cpu'). Where is my mistake and how can I fix it?
There seems to be an error in the forward function at x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x), 2)))
. Here, the dropout layer is being called with two arguments, (output of conv layer and 2) due to misuse of parenthesis.
Here's the corrected version:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
# the line below is wrong in the original code
# Dropout layer only takes one argument, wrong use of parenthesis make it two input arguments
# x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x), 2)))
# correct line
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.softmax(x)