python pytorch image-segmentation attributeerror

AttributeError: 'tuple' object has no attribute 'size' when using smp.Unet with aux_params in segmentation_models_pytorch

I am working on a project that involves semantic segmentation using the smp (segmentation_models_pytorch) library in Python. I am trying to train a UNet model with auxiliary parameters using the smp.Unet class. However, when I add the aux_params argument to the smp.Unet constructor, I encounter an error:

File .../python3.11/site-packages/segmentation_models_pytorch/utils/train.py:51, in Epoch.run(self, dataloader)
     49 for x, y in iterator:
     50     x, y = x.to(self.device), y.to(self.device)
---> 51     loss, y_pred = self.batch_update(x, y)
     53     # update loss logs
     54     loss_value = loss.cpu().detach().numpy()
...
-> 3162 if not (target.size() == input.size()):
   3163     raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
   3165 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
File ".../train_model.py", line 153, in train
    train_logs = self.train_epoch.run(self.train_loader)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File ".../train_model.py", line 173, in main
    water_seg_model.train(epoch_number=100)
  File ".../train_model.py", line 176, in <module>
    main()
AttributeError: 'tuple' object has no attribute 'size'

Here's a simplified version of my code:

ENCODER = 'resnet34'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['cats']
ACTIVATION = None
DROPOUT = 0.5
POOLING = 'avg'
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
THRESHOLD = 0.9
LEARNING_SPEED = 0.001


AUX_PARAMS = dict(
    classes=len(CLASSES),
    dropout=DROPOUT,
    activation=ACTIVATION,
    pooling=POOLING
)

class SegmentationModel():
    def __init__(self):
        self.model = smp.Unet(
            encoder_name=ENCODER,
            encoder_weights=ENCODER_WEIGHTS,
            in_channels=3,
            classes=len(CLASSES),
            aux_params=AUX_PARAMS
        )
        self.preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

        self.loss = smp.losses.SoftBCEWithLogitsLoss()
        self.loss.__name__ = 'SoftBCEWithLogitsLoss'

        self.metrics = [
            smp.utils.metrics.IoU(threshold=THRESHOLD),
        ]

        self.optimizer = torch.optim.Adam([ 
            dict(params=self.model.parameters(), lr=0.0001), 
        ])

        self.train_epoch = smp.utils.train.TrainEpoch(
            self.model, 
            loss=self.loss, 
            metrics=self.metrics, 
            optimizer=self.optimizer,
            device=DEVICE,
            verbose=True,
        )
        self.dataset = Dataset(
            self.images_train_dir, 
            self.masks_train_dir, 
            augmentation=get_training_augmentation(), 
            preprocessing=get_preprocessing(self.preprocessing_fn),
            classes=['cats'],
        )
        self.train_loader = DataLoader(self.train_dataset, batch_size=16, shuffle=True, num_workers=6)

    def train(self, epoch_number: 10):
        for i in range(0, epoch_number):
            print('\nEpoch: {}'.format(i))
            train_logs = self.train_epoch.run(self.train_loader)
def main():
    cats_seg_model = SegmentationModel()
    cats_seg_model.train(epoch_number=100)

What could be causing the 'tuple' object has no attribute 'size' error when using the aux_params argument in smp.Unet? How can I properly initialise the smp.Unet model with the aux_params dictionary to avoid this error?

Any help or insights into this issue would be greatly appreciated. Thank you!

Solution

From smp docs: All models support aux_params parameters, which is default set to None. If aux_params = None then classification auxiliary output is not created, else model produce not only mask, but also label output with shape NC. Classification head consists of GlobalPooling->Dropout(optional)->Linear->Activation(optional) layers, which can be configured by aux_params as follows:

aux_params=dict(
    pooling='avg',             # one of 'avg', 'max'
    dropout=0.5,               # dropout ratio, default is None
    activation='sigmoid',      # activation function, default is None
    classes=4,                 # define number of output labels
)
model = smp.Unet('resnet34', classes=4, aux_params=aux_params)
mask, label = model(x)

So the possible solution, or at least workaround, is to create new Epoch class with label:

class TrainEpochWithAUX(train.Epoch):
    def __init__(self, model, loss, metrics, optimizer, device="cpu", verbose=True):
        super().__init__(
            model=model,
            loss=loss,
            metrics=metrics,
            stage_name="train",
            device=device,
            verbose=verbose,
        )
        self.optimizer = optimizer

    def on_epoch_start(self):
        self.model.train()

    def batch_update(self, x, y):
        self.optimizer.zero_grad()
        prediction, label = self.model.forward(x) # added label here
        loss = self.loss(prediction, y)
        loss.backward()
        self.optimizer.step()
        return loss, prediction