RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! When predicting with my model

I trained a model for sequence classification using transformers (BertForSequenceClassification) and I get the error:

Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)

I don't really get where is the problem, if it's on my model, on how I tokenize the data, or what.

Here is my code:

LOADING THE PRETRAINED MODEL

model_state_dict = torch.load("../MODELOS/TRANSFORMERS/TransformersNormal",  map_location='cpu') #Doesnt work with map_location='cuda:0' neither
model = BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path="bert-base-uncased", state_dict=model_state_dict, cache_dir='./data')

CREATING DATALOAD

def crearDataLoad(dfv,tokenizer): 

  dft=dfv  # usamos el del validacion para que nos salga los resultados y no tener que cambiar mucho codigo

  #validation=dfv['text']  
  validation=dfv['text'].str.lower()  # para modelos uncased  # el fichero que hemos llamado test es usado en la red neuronal
  validation_labels=dfv['label']
  
  validation_inputs = crearinputs (validation,tokenizer)
  validation_masks= crearmask (validation_inputs)
  
  validation_inputs = torch.tensor(validation_inputs)
  
  validation_labels = torch.tensor(validation_labels.values)
  
 
  validation_masks = torch.tensor(validation_masks)
  
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler# The DataLoader needs to know our batch size for training, so we specify it 

  #Colab
  batch_size = 32
  #local
  #batch_size = 15
  
  validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
  validation_sampler = SequentialSampler(validation_data)
  validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

  return validation_dataloader

SHOWING RESULTS

def resultados(validation_dataloader, model, tokenizer):
    
  model.eval()

  # Tracking variables 
  predictions , true_labels = [], []
  pred = []
  t_label =[]
  # Predict 
  for batch in validation_dataloader:    
    # Add batch to GPU , como no tengo lo dejo aquí
    batch = tuple(t.to(device) for t in batch)
  
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
  
    # Telling the model not to compute or store gradients, saving memory and 
    # speeding up prediction
    with torch.no_grad():
      # Forward pass, calculate logit predictions
      outputs = model(b_input_ids, #toktype_ids=None, #
                      attention_mask=b_input_mask) #I GET THE ERROR HERE
     
    logits = outputs[0]
 
  
    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
  
    # Store predictions and true labels
    # Store predictions and true labels
    predictions.append(logits)
    true_labels.append(label_ids)
 
    for l in logits:
      # para cada tupla del logits, se selecciona 0 o 1 dependiendo del valor
      # que sea el mayor (argmax)
      pred_labels_i = np.argmax(l).item()
      pred.append(pred_labels_i)
  
  #Si no me equivoco, en pred guardamos las predicciones hechas por el modelo
  pred=np.asarray(pred).tolist()
  t_label = [val for sublist in true_labels for val in sublist] # para aplanar la lista de etiquetas
  #print('predicciones',pred)
  #print('t_labels',t_label)
  #print('validation_labels',validation_labels )
  print("RESULTADOS KFOLD validacion cruzada")
  from sklearn.metrics import confusion_matrix
  from sklearn.metrics import classification_report
  print(classification_report(t_label, pred))
  print ("Distribution test {}".format(Counter(t_label)))
  from sklearn.metrics import confusion_matrix
  print(confusion_matrix(t_label, pred))
  from sklearn.metrics import roc_auc_score
  print('AUC ROC:')
  print(roc_auc_score(t_label, pred))
  from sklearn.metrics import f1_score
  result=f1_score(t_label, pred, average='binary',labels=[0,1],pos_label=1,zero_division=0)
  print('f1-score macro:')
  print(result)
  print("****************************************************************")
  return result

I get the error at this line in function resultados:

with torch.no_grad():
     # Forward pass, calculate logit predictions
     outputs = model(b_input_ids, #toktype_ids=None, #
                     attention_mask=b_input_mask) #Esto falla

MAIN PROGRAM

trial_data = pd.DataFrame(trial_dataset)

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('no hay gpu')
print('Found GPU at: {}'.format(device_name))

#import torch# If there's a GPU available...
if torch.cuda.is_available():  # Tell PyTorch to use the GPU. 
 device = torch.device("cuda") 
 print('There are %d GPU(s) available.' % torch.cuda.device_count()) 
 print('We will use the GPU:', torch.cuda.get_device_name(0)) # If not...
else:
 print('No GPU available, using the CPU instead.')
 device = torch.device("cpu")

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

validation_dataloader = crearDataLoad(trial_data,tokenizer)
# obteniendo metricas del modelo generado en el paso anterior
model.eval() 
result= resultados(validation_dataloader, model,tokenizer)

Solution

You did not move your model to device, only the data. You need to call model.to(device) before using it with data located on device.