I am new to pytorch as I have just started today, I saw few examples in kaggle and try to adapt my cnn from tensorflow in torch to enable better gpu allocation.
I want to balance the data in train and test dataset so I have used train_test_split. I have two problems:
I could not evaluate the model before training the model. I want to just see the val_loss and val_accuracy without training the model. But that is not possible
I was just testing for 2 epoch but the validation accuracy shows abnormal value, why is it so?
Could someone help please?
I have defined the customdataset in other python file and importing it in the main module
import os
import cv2
import random
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision
from torchsummary import summary
class CustomDataset(Dataset):
def __init__(self, root_folder_path):
self.root_folder_path = root_folder_path
self.image_files = []
self.labels = []
# Collect image paths and corresponding labels
folders = sorted([f for f in os.listdir(root_folder_path) if os.path.isdir(os.path.join(root_folder_path, f))])
self.label_dict = {folder: i for i, folder in enumerate(folders)}
for folder in folders:
folder_path = os.path.join(root_folder_path, folder)
image_files = sorted([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f)) and f.endswith('.jpg')])
self.image_files.extend([os.path.join(folder_path, img) for img in image_files])
self.labels.extend([self.label_dict[folder]] * len(image_files))
self.transform = transforms.Compose([
transforms.Resize((900, 300)),
transforms.Normalize(mean=[0.5], std=[0.5])
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
image_path = self.image_files[idx]
label = self.labels[idx]
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
image = self.transform(image)
#print("Image shape:", image.shape) # Print the shape of the image
one_hot_label = torch.zeros(len(self.label_dict))
one_hot_label[label] = 1
return image, one_hot_label
this is my main script
if __name__ == '__main__':
# Instantiate your custom dataset and dataloaders
root_folder_path = r'W:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\image_dataset_300_900_10_classes'
dataset = CustomDataset(root_folder_path)
print("Labels:", sorted(dataset.label_dict.keys()))
print("Total number of labels:", len(dataset.label_dict))
# Display some images from each folder
n_images_to_display = 4
n_folders = len(dataset.label_dict)
fig, ax = plt.subplots(n_images_to_display, n_folders, figsize=(n_folders * 4, n_images_to_display * 4))
for i, (folder, label) in enumerate(dataset.label_dict.items()):
folder_images = [dataset[i][0] for i, lbl in enumerate(dataset.labels) if lbl == label]
indices_to_display = random.sample(range(len(folder_images)), min(n_images_to_display, len(folder_images)))
for j, ind in enumerate(indices_to_display):
ax[j, i].imshow(folder_images[ind].squeeze(), cmap='gray') # Squeeze to remove the channel dimension for grayscale images
ax[j, i].axis('off')
ax[0, i].set_title(folder, fontsize=30)
fig.tight_layout(pad=0, w_pad=0, h_pad=0)
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
SEED = 42
# Get the labels from the dataset
labels = np.array([label for _, label in dataset])
# generate indices: instead of the actual data we pass in integers instead
train_indices, test_indices, _, _ = train_test_split(
# generate subset based on indices
train_split = Subset(dataset, train_indices)
test_split = Subset(dataset, test_indices)
print('Length of train_batch:',len(train_split))
print('Length of test_batch:',len(test_split))
# create batches
train_loader = DataLoader(train_split, batch_size=BATCH_SIZE, num_workers=6,shuffle=True,pin_memory=True)
test_loader = DataLoader(test_split, batch_size=BATCH_SIZE,num_workers=6,pin_memory=True)
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
return loss
def accuracy(self,outputs, labels):
#_, preds = torch.max(outputs, dim=1)
preds = torch.argmax(outputs, dim=1)
preds_one_hot = F.one_hot(preds, num_classes=labels.shape[1]) # Convert predictions to one-hot encoding
print("Shape of preds:", preds_one_hot.shape) # Check the shape of preds
return torch.sum(preds_one_hot == labels).float().mean()
def validation_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
acc = self.accuracy(out, labels) # Calculate accuracy
return {'val_loss': loss.detach(), 'val_acc': acc}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
batch_accs = [x['val_acc'] for x in outputs]
epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
epoch, result['train_loss'], result['val_loss'], result['val_acc']))
import torch.nn.init as init
class ImageClassification(ImageClassificationBase):
def __init__(self):
self.network = nn.Sequential(
#image size is [1,900,300] as [channel, height,width]
nn.Conv2d(1, 32, kernel_size = 3, padding = 1),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(32,32, kernel_size = 3, padding = 1),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size = 3, padding = 1),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Conv2d(64 ,64, kernel_size = 3, padding = 1),
nn.AvgPool2d(kernel_size=2, stride=2),
nn.Linear(64 * 56 * 18, 64), # Assuming input size after convolutional layers is 64 * 56 * 18
nn.Linear(64, 64),
nn.Linear(64, 10) # Output layer
# Initialize the weights of convolutional layers
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
def forward(self, xb):
return self.network(xb)
def get_default_device():
""" Set Device to GPU or CPU"""
if torch.cuda.is_available():
return torch.device('cuda')
return torch.device('cpu')
def to_device(data, device):
"Move data to the device"
if isinstance(data,(list,tuple)):
return [to_device(x,device) for x in data]
return data.to(device,non_blocking = True)
class DeviceDataLoader():
""" Wrap a dataloader to move data to a device """
def __init__(self, dl, device):
self.dl = dl
self.device = device
def __iter__(self):
""" Yield a batch of data after moving it to device"""
for b in self.dl:
yield to_device(b,self.device)
def __len__(self):
""" Number of batches """
return len(self.dl)
device = get_default_device()
model = ImageClassification()
random_seed = 99
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
to_device(model, device)
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def accuracy(outputs, labels):
_, preds = torch.max(outputs, dim=1)
return torch.tensor(torch.sum(preds == labels).item() / len(preds))
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.RMSprop):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
train_losses = []
for batch in train_loader:
loss = model.training_step(batch)
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
model.epoch_end(epoch, result)
return history
#initial evaluation of the model
#set the no. of epochs, optimizer funtion and learning rate
num_epochs = 2
opt_func = torch.optim.RMSprop
lr = 0.0001
#fitting the model on training data and record the result after each epoch
history = fit(num_epochs, lr, model, train_loader, test_loader, opt_func)
data looks like this and my size of the image in the model is [1,900,300] where 1 stands for graychannel, 900 - height of the image in pixels, 300 - width of the image in pixels
and the output is:
Labels: ['120', '144', '168', '192', '216', '24', '240', '48', '72', '96']
Total number of labels: 10
Length of train_batch: 1835
Length of test_batch: 459
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Train batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Test batch size: torch.Size([32, 1, 900, 300])
Shape of labels array: torch.Size([32, 10])
Epoch [0], train_loss: 1.5568, val_loss: 1.6037, val_acc: 267.2000
Epoch [1], train_loss: 0.9959, val_loss: 1.6849, val_acc: 273.4667
Here is the drive link for the dataset: https://drive.google.com/drive/folders/1PsT9_HWX4snfgnhlwC6xM4rNjcoqXdk5?usp=drive_link
As @Ivan suggested, I have tried but error is still there output of the code
PS C:\Users\smjobagc> & C:/Users/smjobagc/AppData/Local/miniconda3/envs/FSV/python.exe "w:/MASTER_BAGCHI_SCHALDACH/THESIS/code and dataset/10 class cropped 300_900 runs/10_class_torch.py"
Labels: ['120', '144', '168', '192', '216', '24', '240', '48', '72', '96']
Total number of labels: 10
Length of train_batch: 1835
Length of test_batch: 459
Traceback (most recent call last):
File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 236, in <module>
File "C:\Users\smjobagc\AppData\Local\miniconda3\envs\FSV\lib\site-packages\torch\autograd\grad_mode.py", line 28, in decorate_context
return func(*args, **kwargs)
File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in evaluate
outputs = [model.validation_step(batch) for batch in val_loader]
File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 206, in <listcomp>
outputs = [model.validation_step(batch) for batch in val_loader]
File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 92, in validation_step
acc = accuracy(out, labels) # Calculate accuracy
File "w:\MASTER_BAGCHI_SCHALDACH\THESIS\code and dataset\10 class cropped 300_900 runs\10_class_torch.py", line 212, in accuracy
return torch.sum(preds.argmax(1) == labels).float().mean()
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Laying out the situation may help you:
(64, 1, 900, 300)
, ie. on 900 by 300 grayscale image per batch element;(64,)
;(64, 10)
, ie. 10
logits per batch element: q1
, q2
, ..., q10
;preds == labels
, preds is (64, 10)
while labels
is (64,)
.Hence the error, which you can rewrite as
The size of tensor
) must match the size of tensorpreds
) at non-singleton dimension1
You're trying to compare the estimated probability of the image with an integer class, how can you expect this to work? You must first get the arg max of the estimated logits (ie. the class number associated with the highest scoring logit), only then will it make sense to compare with the ground truth to measure the accuracy:
(preds.argmax(1) == labels).float().mean()