I want to do binary classification with FCN. This is the code I implemented.
import monai
import torch
import torch.nn as nn
#FCN + DenseNet
class FCNDenseNet121_1(nn.Module):
def __init__(self, spatial_dims:int, in_channels:int, out_channels:int):
super().__init__()
self.encoder = monai.networks.nets.DenseNet121(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
self.encoder.class_layers.pool = nn.AvgPool3d(kernel_size=1, stride=1)
self.encoder.class_layers.flatten = nn.Identity()
self.encoder.class_layers.out = nn.Identity()
self.classifier = nn.Conv3d(out_channels, 1, kernel_size=1, stride=1)
def forward(self, x):
x = self.encoder(x)
x = self.classifier(x)
return x
class FCNDenseNet121_2(nn.Module):
def __init__(self, spatial_dims:int, in_channels:int, out_channels:int):
super().__init__()
self.encoder = monai.networks.nets.DenseNet121(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
#self.encoder.class_layers.pool = nn.AvgPool3d(kernel_size=1, stride=1)
self.encoder.class_layers.flatten = nn.Identity()
self.encoder.class_layers.out = nn.Identity()
self.classifier = nn.Conv3d(out_channels, 1, kernel_size=1, stride=1)
def forward(self, x):
x = self.encoder(x)
x = self.classifier(x)
x = x.view(x.size(0), -1)
return x
#Built a model, my images are 3 dimensions
model_1 = FCNDenseNet121_1(spatial_dims=3, in_channels=1, out_channels=1024)
model_2 = FCNDenseNet121_2(spatial_dims=3, in_channels=1, out_channels=1024)
loss_function = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#The operation verification
x = torch.randn(1,96,96,96)
print(x.shape) #torch.Size([1, 96, 96, 96])
out_put_1 = model_1(x.unsqueeze(0))
out_put_2 = model_2(x.unsqueeze(0))
print(out_put_1.shape) #torch.Size([1, 1, 3, 3, 3])
print(out_put_2.shape) #torch.Size([1, 1])
I want to ask how to change out_put_1.shape
to [1,1] to calculate loss.
First of all, is this correct way?
Although out_put_2.shape
is [1,1], I don't know if this way of doing it is good.
Which is correct?
If your goal is to perform binary classification, and you want your output to have the shape [1, 1], you should use Model 2 (FCNDenseNet121_2). This is because it already outputs a tensor with the desired shape.
However, to calculate the loss, you need to make sure the target tensor also has a shape of [1, 1] to match the output tensor. For instance, if your target tensor is y, it should have a shape of [1, 1] as well:
y = torch.tensor([[1]]) # example target tensor with shape [1, 1]
loss = loss_function(out_put_2, y)
Keep in mind that you should also modify the batch size when training the model, as you're currently using a batch size of 1. This is generally not recommended, as it can lead to unstable training. Increase the batch size for better training performance.