RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same

I am training a CNN to classify some images. The objective is to classify them into two classes. I already executed the same code on Windows with a RTX3070, now I am trying to do the exact same on Ubuntu with a Nvidea A100-40Gb. The code I am using is this one:

import warnings
warnings.filterwarnings('ignore')

import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import plotly
import plotly.graph_objects as go
%matplotlib inline

import os

from sklearn.calibration import calibration_curve
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import lr_scheduler

if torch.cuda.is_available():
    print("CUDA available. Using GPU acceleration.")
    device = "cuda"
else:
    print("CUDA is NOT available. Using CPU for training.")
    device = "cpu"

import pickle

def save_var(var,filename):
    with open(filename, 'wb') as f:
        pickle.dump(var, f)

def recover_var(filename):
    with open(filename, 'rb') as f:
        var = pickle.load(f)    
    return var

    
df = recover_var('dataframe_cnn.pickle') #my dataset
df = df.sample(frac=1).reset_index(drop=True)

df.columns = ['label'] + list(range(1,27649))

train =df[:int(0.7*len(df))]
test = df[int(0.7*len(df)):]

def preprocessing(train, test, split_train_size = 0.2):
    
    # Split data into features(pixels) and labels(numbers from 0 to 9)
    targets = train.label.values
    features = train.drop(["label"], axis = 1).values
    
    # Normalization
    features = features/255.
    X_test = test.values/255.
    
    # Train test split. Size of train data is (1-split_train_size)*100% and size of test data is split_train_size%. 
    X_train, X_val, y_train, y_val = train_test_split(features,
                                                      targets,
                                                      test_size = split_train_size,
                                                      random_state = 42) 
    
    # Create feature and targets tensor for train set. I need variable to accumulate gradients. Therefore first I create tensor, then I will create variable
    X_train = torch.from_numpy(X_train)
    y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

    # Create feature and targets tensor for test set.
    X_val = torch.from_numpy(X_val)
    y_val = torch.from_numpy(y_val).type(torch.LongTensor) # data type is long
    
    # Create feature tensor for train set.
    X_test = torch.from_numpy(X_test)
    
    return X_train, y_train, X_val, y_val, X_test

X_train, y_train, X_val, y_val, X_test = preprocessing(train, test)

print(f'Shape of training data: {X_train.shape}')
print(f'Shape training labels: {y_train.shape}')
print(f'Shape of validation data: {X_val.shape}')
print(f'Shape of valiation labels: {y_val.shape}')
print(f'Shape of testing data: {X_test.shape}')

# batch_size, epoch and iteration
BATCH_SIZE = 100
N_ITER = 2500
EPOCHS = 5
# I will be trainin the model on another 10 epochs to show flexibility of pytorch
EXTRA_EPOCHS = 10

# Pytorch train and test sets
train_tensor = torch.utils.data.TensorDataset(X_train, y_train)
val_tensor = torch.utils.data.TensorDataset(X_val, y_val)
test_tensor = torch.utils.data.TensorDataset(X_test)

# data loader
train_loader = torch.utils.data.DataLoader(train_tensor, 
                                           batch_size = BATCH_SIZE,
                                           shuffle = True)
val_loader = torch.utils.data.DataLoader(val_tensor, 
                                         batch_size = BATCH_SIZE, 
                                         shuffle = False)
test_loader = torch.utils.data.DataLoader(test_tensor, 
                                          batch_size = BATCH_SIZE,
                                          shuffle = False)



class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # convolution 1
        self.c1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(5,5), stride=1, padding=0)
        self.relu1 = nn.ReLU()
        
        # maxpool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2,2))
        
        # dropout 1
        self.dropout1 = nn.Dropout(0.25)
        
        # convolution 2
        self.c2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3,3), stride=1, padding=0)
        self.relu2 = nn.ReLU()
        
        # maxpool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2,2))

        # dropout 2
        self.dropout2 = nn.Dropout(0.25)
        
        # linear 1
        self.fc1 = nn.Linear(32*5*5, 256)
        
        # dropout 3
        self.dropout3 = nn.Dropout(0.25)
        
        # linear 2
        self.fc2 = nn.Linear(256, 10)
        
    def forward(self, x):
        
        out = self.c1(x) # [BATCH_SIZE, 16, 24, 24]
        out = self.relu1(out) 
        out = self.maxpool1(out) # [BATCH_SIZE, 16, 12, 12]
        out = self.dropout1(out) 
        
        out = self.c2(out) # [BATCH_SIZE, 32, 10, 10]
        out = self.relu2(out) 
        out = self.maxpool2(out) # [BATCH_SIZE, 32, 5, 5]
        out = self.dropout2(out) 
        
        out = out.view(out.size(0), -1) # [BATCH_SIZE, 32*5*5=800]
        out = self.fc1(out) # [BATCH_SIZE, 256]
        out = self.dropout3(out)
        out = self.fc2(out) # [BATCH_SIZE, 10]
        
        return out
    
    
# Create CNN
model = CNNModel()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

# Cross Entropy Loss 
criterion = nn.CrossEntropyLoss()

# LR scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.5)

# On GPU if possible
if torch.cuda.is_available():
    print("Model will be training on GPU")
    model = model.cuda()
    criterion = criterion.cuda()
else:
    print("Model will be training on CPU")


def fit(epoch):
    
    print("Training...")
    # Set model on training mode
    model.train()
    
    # Update lr parameter
    exp_lr_scheduler.step()
    
    # Initialize train loss and train accuracy
    train_running_loss = 0.0
    train_running_correct = 0
    train_running_lr = optimizer.param_groups[0]['lr']
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data.view(BATCH_SIZE,1,144,192)), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        
        train_running_loss += loss.item()
        _, preds = torch.max(output.data, 1)
        train_running_correct += (preds == target).sum().item()
        
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1)% 50 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                 epoch+1, 
                 (batch_idx + 1) * len(data), 
                 len(train_loader.dataset),
                 BATCH_SIZE * (batch_idx + 1) / len(train_loader), 
                 loss.cpu().detach().numpy())
                 )
            
    train_loss = train_running_loss/len(train_loader.dataset)
    train_accuracy = 100. * train_running_correct/len(train_loader.dataset)    
    
    return train_loss, train_accuracy, train_running_lr


def validate(data_loader):
    
    print("Validating...")
    # Set model on validating mode
    model.eval()
    val_preds = torch.LongTensor().cuda()
    val_proba = torch.LongTensor().cuda()
    
    # Initialize validation loss and validation accuracy
    val_running_loss = 0.0
    val_running_correct = 0
    
    for data, target in data_loader:
        # Regarding volatile argument, check the note below
        data, target = Variable(data.view(BATCH_SIZE,1,144,192), volatile=True), Variable(target)
        
        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()
        
        output = model(data)
        loss = criterion(output, target)
        
        val_running_loss += loss.item()
        pred = output.data.max(1, keepdim=True)[1]
        proba = torch.nn.functional.softmax(output.data)

        val_running_correct += pred.eq(target.data.view_as(pred)).cpu().sum() 
        
        # Store val_predictions with probas for confusion matrix calculations & best errors made
        val_preds = torch.cat((val_preds.float(), pred), dim=0).float()
        val_proba = torch.cat((val_proba.float(), proba)).float()

    val_loss = val_running_loss/len(data_loader.dataset)
    val_accuracy = 100. * val_running_correct/len(data_loader.dataset) 
    
    return val_loss, val_accuracy, val_preds, val_proba

train_loss, train_accuracy = [], []
val_loss, val_accuracy = [], []
val_preds, val_proba = [], []
train_lr = []

for epoch in range(EPOCHS):
    
    print(f"Epoch {epoch+1} of {EPOCHS}\n")
    
    train_epoch_loss, train_epoch_accuracy, train_epoch_lr = fit(epoch)
    val_epoch_loss, val_epoch_accuracy, val_epoch_preds, val_epoch_proba = validate(val_loader)
    
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    train_lr.append(train_epoch_lr)
    
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    val_preds.append(val_epoch_preds)
    val_proba.append(val_epoch_proba)
    
    print(f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_accuracy:.2f}")
    print(f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_accuracy:.2f}\n')

However, it is returning the following error:

Cell In[149], line 285
    281 for epoch in range(EPOCHS):
    283     print(f"Epoch {epoch+1} of {EPOCHS}\n")
--> 285     train_epoch_loss, train_epoch_accuracy, train_epoch_lr = fit(epoch)
    286     val_epoch_loss, val_epoch_accuracy, val_epoch_preds, val_epoch_proba = validate(val_loader)
    288     train_loss.append(train_epoch_loss)

Cell In[149], line 213, in fit(epoch)
    210     target = target.cuda()
    212 optimizer.zero_grad()
--> 213 output = model(data)
    214 loss = criterion(output, target)
    216 train_running_loss += loss.item()

File /opt/miniconda3/envs/mlgpu/lib/python3.9/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[149], line 152, in CNNModel.forward(self, x)
    150 def forward(self, x):
--> 152     out = self.c1(x) # [BATCH_SIZE, 16, 24, 24]
    153     out = self.relu1(out) 
    154     out = self.maxpool1(out) # [BATCH_SIZE, 16, 12, 12]

File /opt/miniconda3/envs/mlgpu/lib/python3.9/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/miniconda3/envs/mlgpu/lib/python3.9/site-packages/torch/nn/modules/conv.py:457, in Conv2d.forward(self, input)
    456 def forward(self, input: Tensor) -> Tensor:
--> 457     return self._conv_forward(input, self.weight, self.bias)

File /opt/miniconda3/envs/mlgpu/lib/python3.9/site-packages/torch/nn/modules/conv.py:453, in Conv2d._conv_forward(self, input, weight, bias)
    449 if self.padding_mode != 'zeros':
    450     return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
    451                     weight, bias, self.stride,
    452                     _pair(0), self.dilation, self.groups)
--> 453 return F.conv2d(input, weight, bias, self.stride,
    454                 self.padding, self.dilation, self.groups)

RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same```

I already tried to typecast the output to float, torch.float, np.float32, but it still returned the same error. Moreover, I tried to change the type of the variable x on line 152, without results. How can I solve it?

Solution

Looks like your model parameters are in Float but your data is in Double datatype. I'm not sure exactly how you attempted to cast your tensor but the following should work:

optimizer.zero_grad()
output = model(data.float())

Alternatively, you can convert the model parameters to Double by the following:

# Create CNN
model = CNNModel()
model.double()

Try either of them and it should tackle the tensor type mismatch issue.

NOTE: Do not use Variable(tensor) as you did in the following:

for batch_idx, (data, target) in enumerate(train_loader):
    data, target = Variable(data.view(BATCH_SIZE,1,144,192)), Variable(target)

as well as here:

for data, target in data_loader:
    # Regarding volatile argument, check the note below
    data, target = Variable(data.view(BATCH_SIZE,1,144,192), volatile=True), Variable(target)

The Variable API has been deprecated by PyTorch.