This project is an implementation of the pytorch maskrcnn model for instance segmentation of cells.
The base model runs fine, but in order to increase the training sample, I attempted to implement the albumentation library.
Python: 3.10.6 Torch: 2.0.1+cu118 Numpy: 1.22.4 PIL: 9.4..0 Albumentation: 1.2.1
The following is placed under a Dataset Class, after the image is loaded from the computer's file directory as a numpy array:
class CellDataset(Dataset):
# images are jpgs while masks are pngs
def __init__(self, image_dir, mask_dir, height = 250, width = 250, transform = None):
self.image_dir = image_dir
self.mask_dir = mask_dir
self.transform = transform
self.images = os.listdir(image_dir)
self.masks = os.listdir(mask_dir)
def __len__(self):
return len(self.images)
def __getitem__(self,index):
assert len(os.listdir(image_dir))==len(os.listdir(mask_dir)), "Images and Masks folder size do not match"
img_path = os.path.join(self.image_dir, self.images[index])
mask_path = os.path.join(self.mask_dir, self.masks[index])
if img_path[0].endswith(".npy"):
image = np.load(img_path)
else:
image = np.array(Image.open(img_path).convert("RGB"))
if mask_path[0].endswith(".npy"):
mask = np.load(mask_path)
else:
mask = np.array(Image.open(mask_path))
obj_ids = np.unique(mask)
obj_ids = obj_ids[1:] # removing first item of list, which represents background
num_objs = len(obj_ids) # finding number of objects (i.e. number of cells)
# creates boolean mask of all objects together
masks = np.zeros((num_objs, mask.shape[0], mask.shape[1]))
for i in range(num_objs):
masks[i][mask==i+1] = True
# creates segmentation boxes based on rect coordinates
boxes = []
for i in range(num_objs):
pos = np.where(masks[i]) # create new array with only data points of the object
xmin = np.min(pos[1]) # find max/min of x & y axes
xmax = np.max(pos[1])
ymin = np.min(pos[0])
ymax = np.max(pos[0])
boxes.append([xmin, ymin, xmax, ymax])
boxes = list(boxes) # boxes need to be in a list, according to article
## creating class labels
class_labels = torch.ones((num_objs,),dtype=torch.int64)
# Albumentation transformations
if self.transform is not None:
augmentations = self.transform(image=image, mask=mask, bboxes=boxes, class_labels=class_labels)
image = augmentations["image"]
mask = augmentations["mask"]
boxes = augmentations["bboxes"]
class_labels = augmentations["class_labels"]
boxes = torch.as_tensor(boxes, dtype = torch.float32)
masks = torch.as_tensor(masks, dtype=torch.uint8)
target = {}
target["boxes"] = boxes
target["masks"] = masks
target["class_labels"] = class_labels
return image, target
A file is made to generate the train_loader and val_loader from a directory containing both training and validation data:
def get_loaders(
train_img_dir,
train_mask_dir,
val_img_dir,
val_mask_dir,
batch_size,
train_transform,
val_transform,
num_workers=4,
pin_memory=True,
):
train_ds = CellDataset(
image_dir=train_img_dir,
mask_dir=train_mask_dir,
transform=train_transform,
)
train_loader = DataLoader(
train_ds,
batch_size=batch_size,
num_workers=num_workers,
pin_memory=pin_memory,
shuffle=True,
collate_fn = collate_fn
)
val_ds = CellDataset(
image_dir=val_img_dir,
mask_dir=val_mask_dir,
transform=val_transform,
)
val_loader = DataLoader(
val_ds,
batch_size=batch_size,
num_workers=num_workers,
pin_memory=pin_memory,
shuffle=False,
collate_fn = collate_fn
)
return train_loader, val_loader
A seperate file loads the Dataset and runs the training loop:
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2()
bbox_params = A.BboxParams(format='pascal_voc',label_fields = [])
def main():
# defining transformations. Will be accessed in dataloader
train_transform = A.Compose([
# A.Rotate(limit=35, p=1.0),
A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
A.Resize(height=image_height, width=image_width),
A.Normalize(
mean=[0.0, 0.0, 0.0],
std=[1.0, 1.0, 1.0],
max_pixel_value=255.0,
),
ToTensorV2()
],bbox_params=bbox_params, p=1)
val_transforms = A.Compose([
A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
A.Resize(height=image_height, width=image_width),
A.Normalize(
mean=[0.0, 0.0, 0.0],
std=[1.0, 1.0, 1.0],
max_pixel_value=255.0,
),
ToTensorV2()
],bbox_params=bbox_params, p=1)
loss_fn = nn.BCEWithLogitsLoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=momentum, weight_decay=weight_decay)
train_loader, val_loader = get_loaders(
train_img_dir,
train_mask_dir,
val_img_dir,
val_mask_dir,
batch_size,
train_transform,
val_transforms,
num_workers,
pin_memory,
)
scaler = torch.cuda.amp.GradScaler()
model.to(device)
for epoch in range(num_epochs):
train_epoch_loss = 0
val_epoch_loss = 0
model.train()
for i, dt in enumerate(train_loader):
img = [dt[0][0].to(device), dt[1][0].to(device)]
targ = [dt[0][1], dt[1][1]]
targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
# targets = [{k: v for k, v in t.items()} for t in targ]
loss = model(img, targets)
print(loss)
losses = sum([l for l in loss.values()])
train_epoch_loss += losses.cpu().detach().numpy()
optimizer.zero_grad()
losses.backward()
optimizer.step()
all_train_losses.append(train_epoch_loss)
with torch.no_grad():
for j, dt in enumerate(val_loader):
img = [dt[0][0].to(device), dt[1][0].to(device)]
targ = [dt[0][1], dt[1][1]]
targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
# targets = [{k: v for k, v in t.items()} for t in targ]
loss = model(img, targets)
losses = sum([l for l in loss.values()])
val_epoch_loss += losses.cpu().detach().numpy()
all_val_losses.append(val_epoch_loss)
print(epoch, " ", train_epoch_loss, " ", val_epoch_loss)
Resulting Error Message:
/usr/local/lib/python3.10/dist-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.
warnings.warn("torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.")
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-8-692fc61b0d19> in <cell line: 162>()
161
162 if __name__ == "__main__":
--> 163 main()
4 frames
<ipython-input-8-692fc61b0d19> in main()
121 targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
122 # targets = [{k: v for k, v in t.items()} for t in targ]
--> 123 loss = model(img, targets)
124 print(loss)
125 losses = sum([l for l in loss.values()])
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
103 features = OrderedDict([("0", features)])
104 proposals, proposal_losses = self.rpn(images, features, targets)
--> 105 detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
106 detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) # type: ignore[operator]
107
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
746 if not t["boxes"].dtype in floating_point_types:
747 raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
--> 748 if not t["labels"].dtype == torch.int64:
749 raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
750 if self.has_keypoint():
KeyError: 'labels'
The particular line that seeems to cause the error checks if the t["labels"].dtype is a torch.int64, yet the labels are formatted as such in the DataLoader class.
First be aware that your error is not that the datatype isn't int64; it's that the field 'labels'
doesn't even exist at all. It doesn't get to the actual comparison.
Apart from knowing that your targets clearly are missing this required field, you don't actually show enough of your code, as nowhere in the code is train_loader
actually defined.
Based on context, my most educated guess is that
target = {}
target["boxes"] = boxes
target["masks"] = masks
target["class_labels"] = class_labels
is the targets part of train_loader
, and if so then clearly the field "class_labels"
is wrong; this model expects the field to be named exactly just "labels"
.