Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU (while checking arguments for addmm

So I have received this error when running my simple regression code:

class linear_regression(torch.nn.Module):
def __init__(self, inputSize, outputSize):
    super(linear_regression, self).__init__()
    self.linear = torch.nn.Linear(inputSize, outputSize)
    
def forward(self, X):
    out = self.linear(X)
    return out

#fit_linear_reg(train_ds, train_X_ds, train_y_ds, test_X_ds, which_case, fold_no, p_t)
def fit_linear_reg(train_X_torch, train_y_torch, test_X_torch, case_type, fold_no, p_t):
    size_input = train_X_torch.shape[1]
    size_output = train_y_torch.shape[1]

model = linear_regression(size_input, size_output)
model.to(torch.device(device_name))
learningRate = 0.01 
epochs = 1
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
training_loss_list = []

for epoch in range(epochs):
    # Converting inputs and labels to Variable
    if torch.cuda.is_available():
        inputs = Variable(train_X_torch.cuda().float())
        labels = Variable(train_y_torch.cuda().float())
    else:
        inputs = Variable(train_X_torch.float())
        labels = Variable(train_y_torch.float())

    # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
    optimizer.zero_grad()

    # get output from the model, given the inputs
    outputs = 0
    if torch.cuda.is_available():
        outputs = model(inputs.to(device))
    else:
        outputs = model(inputs)

    # get loss for the predicted output
    loss = criterion(outputs, labels.)
    print(loss)
    # get gradients w.r.t to parameters
    loss.backward()

    # update parameters
    optimizer.step()

    training_loss_list.append(loss.item())
    #print('epoch {}, loss {}'.format(epoch, loss.item()))
    
torch.save(model.state_dict(), './results/weights_' + case_type + '_' +  str(fold_no) + '_' + p_t)

return (model(test_X_torch.float()), training_loss_list)

I have tried to pass my variabls to cuda, however, I am still receiving this error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-48-ccf21586daaf> in <module>
     40 
     41         #print("khodesh shekl", test_X_ds.shape)
---> 42         preds, training_loss_list_pt = fit_general(train_X_ds, train_y_ds, test_X_ds, test_y_ds, which_case, k_no, p_t)
     43         k_no += 1
     44         #print("final shape", preds.shape)

<ipython-input-46-abaeea73fcec> in fit_general(train_X_ds, train_y_ds, test_X_ds, test_y_torch, which_case, fold_no, p_t)
      2 
      3     if(which_case == "reg_simple"): #ok
----> 4         a, b = fit_linear_reg(train_X_ds, train_y_ds, test_X_ds, which_case, fold_no, p_t)
      5         return a, b
      6 

<ipython-input-45-35542c8a0f30> in fit_linear_reg(train_X_torch, train_y_torch, test_X_torch, case_type, fold_no, p_t)
     54     torch.save(model.state_dict(), './results/weights_' + case_type + '_' +  str(fold_no) + '_' + p_t)
     55 
---> 56     return (model(test_X_torch.float()), training_loss_list)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-45-35542c8a0f30> in forward(self, X)
      5 
      6     def forward(self, X):
----> 7         out = self.linear(X)
      8         return out
      9 

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

RuntimeError: Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU (while checking arguments for addmm)

Probably I have missed a variable to pass it to CUDA, but what can it be here?

Here is the code that I pass my data to the fit_linear_reg function:

#simple model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device_name = device
print("running device: ", device)

#fixme: k-fold cross validation
n_crossVal = 2
kf = KFold(n_splits = n_crossVal) #, random_state=1, shuffle=True fixme



for p_t in key_set_1:
    print(p_t)
    cur_ds = []
    for i, roi in enumerate(key_set_2):
        if(i==0):
            cur_ds = brain_ds[p_t + '_' + roi]
        else:
            cur_ds = np.hstack((cur_ds, brain_ds[p_t + '_' + roi]))
    
    print(cur_ds.shape)
    print(n_train)
    size_input = cur_ds.shape[1]
    
    preds_case = np.zeros(glove_ds.shape)
    k_no = 0
    training_loss_list_pt = []
    
    # Linear regression
    for k_train_index, k_test_index in kf.split(cur_ds):
        
        train_X_ds = torch.from_numpy(cur_ds[k_train_index, :])
        train_y_ds = torch.from_numpy(glove_ds[k_train_index, :])
        train_ds   = TensorDataset(train_X_ds, train_y_ds)
        
        test_X_ds  = torch.from_numpy(cur_ds[k_test_index, :])
        test_y_ds = torch.from_numpy(glove_ds[k_test_index, :])
        test_ds   =  TensorDataset(test_X_ds, test_y_ds)
        
        #print("khodesh shekl", test_X_ds.shape)
        preds, training_loss_list_pt = fit_linear_reg(train_X_ds, train_y_ds, test_X_ds, test_y_ds, which_case, k_no, p_t)
        k_no += 1
        #print("final shape", preds.shape)
        preds_case[k_test_index, :] = preds.detach().numpy()
        
        plot_the_training_loss_in_each_fold(training_loss_list_pt, which_case, k_no, p_t) #todo
        print(training_loss_list_pt)
    
    print("prediction results (correlation) for " + p_t + " ")
    print(np.corrcoef(preds_case, torch.from_numpy(glove_ds))) #todo
    
    #calculate the shuffled data
    #compare the shuffled with normal ones
    
    #statistical test
        
    
    #Saving the results #todo
    
    #if(which_case == "MM")
    ## For MM:
    # Model ke train shod, create the updated glove using each fold, 
    # do linear prediction using the new updated gloves
    # Compare
    # Put ifs for cases
    """
    train_X_ds = torch.from_numpy(cur_ds[:n_train, :])
    train_y_ds = torch.from_numpy(glove_ds[:n_train, :])
    train_ds   = TensorDataset(train_X_ds, train_y_ds)
    
    
    valid_X_ds = torch.from_numpy(cur_ds[n_train:n_train+n_val, :])
    valid_y_ds = torch.from_numpy(glove_ds[n_train:n_train+n_val, :])
    valid_ds   = TensorDataset(valid_X_ds, valid_y_ds)
    
    test_X_ds  = torch.from_numpy(cur_ds[n_train+n_val:, :])
    test_y_ds = torch.from_numpy(glove_ds[n_train+n_val:, :])
    test_ds   =  TensorDataset(test_X_ds, test_y_ds)
    
    print("analyzing", p_t, ' ', roi, ' :')

    fit_reg(train_ds, train_X_ds, train_y_ds)
    #model = MM_Net(train_X_ds, train_y_ds)

    
    
    print(train_X_ds.shape, valid_X_ds.shape, test_X_ds.shape)
    
    """

Solution

You haven't transferred your test data on the GPU:

model(test_X_torch.float().cuda())