I have this code:
import logging
import os
import sys
import tempfile
from glob import glob
import torch
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
import monai
from monai.data import create_test_image_2d, list_data_collate, decollate_batch, DataLoader
from monai.inferers import sliding_window_inference
from monai.metrics import DiceMetric
from monai.transforms import (
Activations,
EnsureChannelFirstd,
Compose,
ScaleIntensityd,
ToTensor,
DivisiblePadd,
AsDiscrete
)
from monai.visualize import plot_2d_or_3d_image
def main(tempdir):
monai.config.print_config()
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Check and convert data format only once
converted_raw_dict, converted_analyzed_dict = check_and_convert_format(updated_raw_dict, new_analyzed_dict)
# Check if the dictionaries contain the same length or not, then create train val test:
if len(converted_raw_dict) != len(converted_analyzed_dict):
raise ValueError("The lengths of converted_raw_dict and converted_analyzed_dict do not match.")
num_images = len(converted_raw_dict)
# Calculate the number of images for training, validation, and test, e.g., using an 80-10-10 split
raw_image_list = list(converted_raw_dict.items())
analyzed_image_list = list(converted_analyzed_dict.items())
# Calculate the number of images for training, validation, and test
train_percentage = 0.8
val_percentage = 0.1
num_images = len(raw_image_list)
num_train_images = int(num_images * train_percentage)
num_val_images = int(num_images * val_percentage)
# Select images for training, validation, and test
train_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[:num_train_images]]
val_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[num_train_images:num_train_images + num_val_images]]
test_files = [{"img": image, "seg": converted_analyzed_dict[name]} for name, image in raw_image_list[num_train_images + num_val_images:]]
# define transforms for image and segmentation
train_transforms = Compose(
[
ToTensor(),
EnsureChannelFirstd(keys=["img", "seg"], channel_dim=-1), # Use channel_dim=-1 for NumPy arrays
ScaleIntensityd(keys=["img", "seg"]),
DivisiblePadd(keys=["img", "seg"],k=16),
]
)
val_transforms = Compose(
[
ToTensor(),
EnsureChannelFirstd(keys=["img", "seg"], channel_dim=-1), # Use channel_dim=-1 for NumPy arrays
ScaleIntensityd(keys=["img", "seg"]),
DivisiblePadd(keys=["img", "seg"],k=16),
]
)
# define dataset, data loader
check_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
check_loader = DataLoader(check_ds, batch_size=1, num_workers=1, collate_fn=list_data_collate)
check_data = monai.utils.misc.first(check_loader)
print(check_data["img"].shape, check_data["seg"].shape)
# create a training data loader
train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
train_loader = DataLoader(
train_ds,
batch_size=1,
shuffle=True,
num_workers=1,
collate_fn=list_data_collate,
pin_memory=torch.cuda.is_available(),
)
# create a validation data loader
val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
val_loader = DataLoader(val_ds, batch_size=1, num_workers=1, collate_fn=list_data_collate)
dice_metric = DiceMetric(include_background=True, reduction="mean", get_not_nans=False)
post_trans = Compose([Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
# create UNet, DiceLoss and Adam optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Print the shape of your input image for debugging
#input_image_shape = train_files[0]["img"].shape
#print("Input image shape:", input_image_shape)
model = monai.networks.nets.UNet(
spatial_dims=2,
in_channels=1,
out_channels=1,
channels=(16, 32, 64, 128, 256),
strides=(2, 2, 2, 2),
num_res_units=2,
).to(device)
loss_function = monai.losses.DiceLoss(sigmoid=True)
optimizer = torch.optim.Adam(model.parameters(), 1e-3)
# start a typical PyTorch training
val_interval = 5
best_metric = -1
best_metric_epoch = -1
epoch_loss_values = list()
metric_values = list()
writer = SummaryWriter()
# Define the number of mini-batches to accumulate gradients over
accumulation_steps = 4 # You can adjust this value based on your GPU memory capacity
scaler = GradScaler()
for epoch in range(10):
print("-" * 10)
print(f"epoch {epoch + 1}/{10}")
model.train()
epoch_loss = 0
step = 0
accumulated_loss = 0 # Initialize accumulated loss
for batch_data in train_loader:
step += 1
inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device)
optimizer.zero_grad()
with autocast():
outputs = model(inputs)
loss = loss_function(outputs, labels)
loss /= accumulation_steps
scaler.scale(loss).backward()
if step % accumulation_steps == 0:
scaler.step(optimizer)
scaler.update()
optimizer.zero_grad()
epoch_loss += accumulated_loss.item() # Accumulated loss for logging
accumulated_loss = 0 # Reset accumulated loss
else:
accumulated_loss += loss # Accumulate the loss
epoch_len = len(train_ds) // (train_loader.batch_size * accumulation_steps)
print(f"{step}/{epoch_len}, train_loss: {loss.item():.4f}")
writer.add_scalar("train_loss", loss.item(), epoch_len * epoch + step)
# Handle any remaining accumulated loss
if step % accumulation_steps != 0:
optimizer.step()
epoch_loss += accumulated_loss.item()
epoch_loss /= step
epoch_loss_values.append(epoch_loss)
print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}")
if (epoch + 1) % val_interval == 0:
model.eval()
with torch.no_grad():
val_images = None
val_labels = None
val_outputs = None
for val_data in val_loader:
val_images, val_labels = val_data["img"].to(device), val_data["seg"].to(device)
roi_size = (64, 64)
sw_batch_size = 4
val_outputs = sliding_window_inference(val_images, roi_size, sw_batch_size, model)
val_outputs = [post_trans(i) for i in decollate_batch(val_outputs)]
# compute metric for current iteration
dice_metric(y_pred=val_outputs, y=val_labels)
# aggregate the final mean dice result
metric = dice_metric.aggregate().item()
# reset the status for next validation round
dice_metric.reset()
metric_values.append(metric)
if metric > best_metric:
best_metric = metric
best_metric_epoch = epoch + 1
torch.save(model.state_dict(), "best_metric_model_segmentation2d_dict.pth")
print("saved new best metric model")
print(
"current epoch: {} current mean dice: {:.4f} best mean dice: {:.4f} at epoch {}".format(
epoch + 1, metric, best_metric, best_metric_epoch
)
)
writer.add_scalar("val_mean_dice", metric, epoch + 1)
# Print the shape of inputs, labels, and outputs
print("Input shape:", inputs.shape)
print("Label shape:", labels.shape)
print("Output shape:", outputs.shape)
# plot the last model output as GIF image in TensorBoard with the corresponding image and label
plot_2d_or_3d_image(val_images, epoch + 1, writer, index=0, tag="image")
plot_2d_or_3d_image(val_labels, epoch + 1, writer, index=0, tag="label")
plot_2d_or_3d_image(val_outputs, epoch + 1, writer, index=0, tag="output")
print(f"train completed, best_metric: {best_metric:.4f} at epoch: {best_metric_epoch}")
writer.close()
if __name__ == "__main__":
with tempfile.TemporaryDirectory() as tempdir:
main(tempdir)
I have these tensors for my network: torch.Size([1, 1536, 1152]) torch.Size([1, 1536, 1152]).
But I always get the runtime error:
RuntimeError: Sizes of tensors must match except in dimension 1.
Expected size 128 but got size 256 for tensor number 1 in the list.
If I change my channel sizes, say I multiplied them by 2, I get Expected size 256 but got size 512 in that case. I am pretty sure I am making an obvious mistake but I could not find what specifically.
I have been trying to test a simple UNet with using Monai and Google Colab, but I am having tensor shape mismatch error. I just tried to train a network into overfitting with little images so that I know I am on te right track. But having either syntax or network architecture related issues. Could not fix it. I changed channels=(16, 32, 64, 128, 256) into channels=(32, 64, 128, 256, 512) or any other multiple, but got thhe same runtimeerror, just with different values as size and expected size.
it is the dimension problem of the input data.
inputs and labels shoud not have three dimension.
the dimesion should be torch.Size([1, 1, 1536, 1152]) torch.Size([1, 1, 1536, 1152]) if the batch size is 1.
or torch.Size([10, 1, 1536, 1152]) torch.Size([10, 1, 1536, 1152]) if the batch size is 10.
you forget the batch dimension in the first dimension.
the codes below works fine
import logging
import os
import sys
import tempfile
from glob import glob
import torch
from torch.cuda.amp import autocast, GradScaler
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
import monai
def main(tempdir):
monai.config.print_config()
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = monai.networks.nets.UNet(
spatial_dims=2,
in_channels=1,
out_channels=1,
channels=(16, 32, 64, 128, 256),
strides=(2, 2, 2, 2),
num_res_units=2,
).to(device)
loss_function = monai.losses.DiceLoss(sigmoid=True)
optimizer = torch.optim.Adam(model.parameters(), 1e-3)
# start a typical PyTorch training
val_interval = 5
best_metric = -1
best_metric_epoch = -1
epoch_loss_values = list()
metric_values = list()
writer = SummaryWriter()
# Define the number of mini-batches to accumulate gradients over
accumulation_steps = 4 # You can adjust this value based on your GPU memory capacity
scaler = GradScaler()
for epoch in range(10):
print("-" * 10)
print(f"epoch {epoch + 1}/{10}")
model.train()
epoch_loss = 0
step = 0
accumulated_loss = 0 # Initialize accumulated loss
# for batch_data in train_loader:
if True:
step += 1
# inputs, labels = batch_data["img"].to(device), batch_data["seg"].to(device)
inputs = torch.zeros((1, 1, 1536, 1152)).to(device)
labels = torch.zeros((1, 1, 1536, 1152)).to(device)
optimizer.zero_grad()
with autocast():
outputs = model(inputs)
loss = loss_function(outputs, labels)
loss /= accumulation_steps
exit(0)
if __name__ == "__main__":
with tempfile.TemporaryDirectory() as tempdir:
main(tempdir)