I am trying to build a custom model for image classification. This is the code.
class CustomModel(nn.Module):
def __init__(self, input_channels, hidden_units, output_classes):
super(CustomModel, self).__init__()
# Global feature extraction layers
self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) # Global average pooling
# Local feature extraction layers
self.local_conv = nn.Conv2d(input_channels, hidden_units, kernel_size=3, padding=1)
# Convolutional neural network
self.cnn = nn.Sequential(
nn.Conv2d(19, hidden_units, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(19, hidden_units * 2, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
# Fully connected layer
self.fc = nn.Linear(19 * 2 * 56 * 56, output_classes)
def forward(self, x):
# Global feature extraction
global_features = self.global_pool(x)
global_features
global_features = global_features.view(global_features.size(0), -1)
global_features = global_features.unsqueeze(-1).unsqueeze(-1) # Expand dimensions to match local features
global_features = global_features.expand(-1, -1, x.size(2), x.size(3)) # Expand to match spatial dimensions
# Local feature extraction
local_features = self.local_conv(x)
local_features = F.relu(local_features)
# Concatenate global and local features
combined_features = torch.cat((global_features, local_features), dim=1)
# CNN processing
cnn_output = self.cnn(combined_features)
# Flatten for fully connected layer
cnn_output = cnn_output.view(cnn_output.size(0), -1)
# Fully connected layer
output = self.fc(cnn_output)
return output
#custom_model_1 = CustomModel(input_channels = 3 , hidden_units = 16 , output_classes = len(class_names) )
input_channels = 3
hidden_units = 16
output_classes = len(class_names) # this is 75 btw.
custom_model = CustomModel(input_channels, hidden_units, output_classes)
when I try to train the model I am getting this error.
Given groups=1, weight of size [16, 32, 3, 3], expected input[42, 19, 224, 224] to have 32 channels, but got 19 channels instead
Why is this happening? I have a hunch that the input dimension are not matching to the conv layers. Not sure where I am missing. Please help!
Your conv dimensions don't line up. I've replaced hidden_units
with your value 16 for clarity
self.cnn = nn.Sequential(
nn.Conv2d(19, 16, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(19, 32, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
The first conv takes in a 19 channel input and produces a 16 channel output.
Your second conv expects a 19 channel input, but gets a 16 channel input, throwing the error.
You probably want something like
self.cnn = nn.Sequential(
nn.Conv2d(19, hidden_units, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(hidden_units, hidden_units * 2, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
Also some errata on CNN design:
MaxPool layers have fallen out of style in favor of adding stride to the convolution (conv with stride 2 produces the same size reduction as conv with stride 1 + max pool). Nothing strictly wrong with having explicit max pool layers but worth noting.
You also definitely want to add an adaptive max pool layer after your conv layer. Without adaptive pooling, your model only works for a fixed input size (required to match up with the input size for the final linear layer). Adding an adaptive pooling layer before the final linear layer makes your model compatible with most image sizes.