I am running a simple 1d CNN model for MNIST dataset and doing it for research purpose so have to stick with using conv1d layer. My CNN model is as:
class net_mnist(nn.Module):
def __init__(self, input_size, output_size):
super(net_mnist, self).__init__()
self.conv1 = nn.Conv1d(1, 1, kernel_size=1, stride=1, padding=1)
self.pool = nn.MaxPool1d(kernel_size=1, stride=2)
self.fc1 = nn.Linear(input_size, 4096,bias = True)
self.fc2 = nn.Linear(4096, 4096,bias = True)
self.fc3 = nn.Linear(4096,4096,bias = True)
self.fc4 = nn.Linear(4096,4096,bias = True)
self.fc5 = nn.Linear(4096,output_size,bias = True)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool(x)
x = x.view(x.size(0), -1)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
return x.squeeze()
And the function for loading dataset is:
transforms = Compose([
ToTensor(),
# Normalize(
# mean=[0.1307],
# std=[0.3081],
# )
])
trainset = MNIST(root = './mnist_data',train = True, download = True,transform=transforms)
testset = MNIST(root = './mnist_data',train = False,download = True,transform=transforms)
trainloader = DataLoader(trainset,batch_size=96,shuffle=True)
testloader = DataLoader(testset ,batch_size=96,shuffle=True)
And cell where error is occuring is:
net = net_mnist(28*28, 10)
net.to(device)
criterion = nn.CrossEntropyLoss().to(device)
writer = SummaryWriter()
net = train_mnist(net,trainloader,testloader,criterion,lrate=0.03,max_epochs=6)
And following is the error:
RuntimeError Traceback (most recent call last)
<ipython-input-21-6bc9f6dcde7f> in <cell line: 5>()
3 criterion = nn.CrossEntropyLoss().to(device)
4 writer = SummaryWriter()
----> 5 net = train_mnist(net,trainloader,testloader,criterion,lrate=0.03,max_epochs=6)
5 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
307 weight, bias, self.stride,
308 _single(0), self.dilation, self.groups)
--> 309 return F.conv1d(input, weight, bias, self.stride,
310 self.padding, self.dilation, self.groups)
311
RuntimeError: Expected 2D (unbatched) or 3D (batched) input to conv1d, but got input of size: [96, 1, 28, 28]
I get it that to remove above error I need to flatten the images and then pass it as [96,1,784] vector but I can't understand how and where in the code I should do it to remove the error. I tried the solution given by Chatgpt but it didn't worked. And on internet I could not find the way to flatten the MNIST images for CNN structure.
You just reshape the tensor.
bs, c, h, w = image_tensor.shape
flattened_tensor = image_tensor.reshape(bs, c, h*w)
Also doing a 1d conv on a 1 channel input is kinda pointless. This is why most MNIST models just drop the channel and send the flattened input to a MLP.