I created a code using the PyTorch library, and I am training with .png images that are normally 24-32 bits. To reduce RAM and GPU usage, I converted the images to 1-bit (keeping their size fixed at 512x512). However, there was no change in the training time. I have included the code; please review it and provide feedback.
The images were converted to 1-bit, but the training time didn't change. I don't want to change the size of the images, just reduce them to 1-bit to speed up the training. I’m not sure what to do to overcome the bottleneck. It seems like the code might not be converting to 1-bit correctly.
# -*- coding: utf-8 -*-
# setxkbmap tr
### Import all the Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from PIL import Image
### Set all the Constants
BATCH_SIZE = 32
IMAGE_SIZE = 512
CHANNELS = 1 # 1-channel for binary images
EPOCHS = 500
n_classes = 2
EARLY_STOPPING_PATIENCE = 5 # Number of epochs to wait for improvement before stopping
### Data Transformations
transform = transforms.Compose([
transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)), # Resize images to the desired size
transforms.Grayscale(num_output_channels=CHANNELS), # Ensure the image is single-channel
transforms.ToTensor(), # Convert image to Tensor
])
### Custom Dataset Class for Binary Images
class BinaryImageDataset(datasets.ImageFolder):
def __getitem__(self, index):
img, label = super().__getitem__(index)
img = transforms.ToPILImage()(img) # Convert Tensor to PIL image
img = img.convert('1') # Convert image to 1-bit depth
img = transforms.ToTensor()(img) # Convert back to Tensor
return img, label
# Import data into PyTorch dataset object
dataset = BinaryImageDataset(root="/home/han/Documents/04.09.2024/500_TrainAB_Binary", transform=transform)
# Print the number of images and classes
print(f"Number of images in dataset: {len(dataset)}")
print(f"Classes: {dataset.classes}")
# Print the size of the first image (as Tensor) and its label
first_image, first_label = dataset[0]
print(f"Size of the first image (Tensor): {first_image.size()}") # Prints: [C, H, W]
# Extract the dimensions
channels, height, width = first_image.size()
print(f"Channels: {channels}")
print(f"Height: {height}")
print(f"Width: {width}")
print(f"Label of the first image: {dataset.classes[first_label]}")
### Train, Test, Validation Data Split
def get_dataset_partitions(dataset, train_split=0.8, val_split=0.1, test_split=0.1):
assert (train_split + val_split + test_split) == 1
ds_size = len(dataset)
train_size = int(train_split * ds_size)
val_size = int(val_split * ds_size)
train_ds, remaining_ds = random_split(dataset, [train_size, ds_size - train_size])
val_ds, test_ds = random_split(remaining_ds, [val_size, ds_size - train_size - val_size])
return train_ds, val_ds, test_ds
train_ds, val_ds, test_ds = get_dataset_partitions(dataset)
print(f"Size of Data is: {len(dataset)}")
print(f"Size of Training Data: {len(train_ds)}")
print(f"Size of Validation Data: {len(val_ds)}")
print(f"Size of Testing Data: {len(test_ds)}")
### Data Loaders
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
### Building the Model
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=CHANNELS, out_channels=32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(512 * (IMAGE_SIZE // 32) * (IMAGE_SIZE // 32), 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, n_classes)
)
def forward(self, x):
return self.model(x)
model = CNNModel()
### Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
### Training the Model with Early Stopping
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
best_model_wts = model.state_dict()
best_acc = 0.0
epochs_without_improvement = 0
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
corrects = 0
total = 0
for inputs, labels in train_loader:
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
_, preds = torch.max(outputs, 1)
corrects += torch.sum(preds == labels.data)
total += labels.size(0)
epoch_loss = running_loss / len(train_loader.dataset)
epoch_acc = corrects.double() / total
print(f"Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
# Validation phase
model.eval()
val_corrects = 0
val_total = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
val_corrects += torch.sum(preds == labels.data)
val_total += labels.size(0)
val_acc = val_corrects.double() / val_total
print(f"Validation Accuracy: {val_acc:.4f}")
# Check for improvement
if val_acc > best_acc:
best_acc = val_acc
best_model_wts = model.state_dict()
epochs_without_improvement = 0 # Reset the counter
else:
epochs_without_improvement += 1
if epochs_without_improvement >= patience:
print(f"Early stopping triggered after {epoch} epochs without improvement.")
break
print(f"Best Validation Accuracy: {best_acc:.4f}")
model.load_state_dict(best_model_wts)
return model
model = train_model(model, train_loader, val_loader, criterion, optimizer, EPOCHS, EARLY_STOPPING_PATIENCE)
### Testing the Model
def test_model(model, test_loader):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
corrects = 0
total = 0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
corrects += torch.sum(preds == labels.data)
total += labels.size(0)
test_acc = corrects.double() / total
print(f"Test Accuracy: {test_acc:.4f}")
test_model(model, test_loader)
As far as I know , Pytorch does not support 1 bit tensors. Their smallest datatype available is torch.uint8. which stores 8 bits per value. This might be the reason why the code is not converting to 1bit.
You can try converting to torch.uint8 which still reduces memory and GPU usage compared to torch.float32.
To see the datatype after transformation, try
print(img.dtype)
which most likely will show torch.float32 which is set by default.