I trained a model for sequence classification using transformers (BertForSequenceClassification) and I get the error:
Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)
I don't really get where is the problem, if it's on my model, on how I tokenize the data, or what.
Here is my code:
LOADING THE PRETRAINED MODEL
model_state_dict = torch.load("../MODELOS/TRANSFORMERS/TransformersNormal", map_location='cpu') #Doesnt work with map_location='cuda:0' neither
model = BertForSequenceClassification.from_pretrained(pretrained_model_name_or_path="bert-base-uncased", state_dict=model_state_dict, cache_dir='./data')
CREATING DATALOAD
def crearDataLoad(dfv,tokenizer):
dft=dfv # usamos el del validacion para que nos salga los resultados y no tener que cambiar mucho codigo
#validation=dfv['text']
validation=dfv['text'].str.lower() # para modelos uncased # el fichero que hemos llamado test es usado en la red neuronal
validation_labels=dfv['label']
validation_inputs = crearinputs (validation,tokenizer)
validation_masks= crearmask (validation_inputs)
validation_inputs = torch.tensor(validation_inputs)
validation_labels = torch.tensor(validation_labels.values)
validation_masks = torch.tensor(validation_masks)
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler# The DataLoader needs to know our batch size for training, so we specify it
#Colab
batch_size = 32
#local
#batch_size = 15
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)
return validation_dataloader
SHOWING RESULTS
def resultados(validation_dataloader, model, tokenizer):
model.eval()
# Tracking variables
predictions , true_labels = [], []
pred = []
t_label =[]
# Predict
for batch in validation_dataloader:
# Add batch to GPU , como no tengo lo dejo aquí
batch = tuple(t.to(device) for t in batch)
# Unpack the inputs from our dataloader
b_input_ids, b_input_mask, b_labels = batch
# Telling the model not to compute or store gradients, saving memory and
# speeding up prediction
with torch.no_grad():
# Forward pass, calculate logit predictions
outputs = model(b_input_ids, #toktype_ids=None, #
attention_mask=b_input_mask) #I GET THE ERROR HERE
logits = outputs[0]
# Move logits and labels to CPU
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
# Store predictions and true labels
# Store predictions and true labels
predictions.append(logits)
true_labels.append(label_ids)
for l in logits:
# para cada tupla del logits, se selecciona 0 o 1 dependiendo del valor
# que sea el mayor (argmax)
pred_labels_i = np.argmax(l).item()
pred.append(pred_labels_i)
#Si no me equivoco, en pred guardamos las predicciones hechas por el modelo
pred=np.asarray(pred).tolist()
t_label = [val for sublist in true_labels for val in sublist] # para aplanar la lista de etiquetas
#print('predicciones',pred)
#print('t_labels',t_label)
#print('validation_labels',validation_labels )
print("RESULTADOS KFOLD validacion cruzada")
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
print(classification_report(t_label, pred))
print ("Distribution test {}".format(Counter(t_label)))
from sklearn.metrics import confusion_matrix
print(confusion_matrix(t_label, pred))
from sklearn.metrics import roc_auc_score
print('AUC ROC:')
print(roc_auc_score(t_label, pred))
from sklearn.metrics import f1_score
result=f1_score(t_label, pred, average='binary',labels=[0,1],pos_label=1,zero_division=0)
print('f1-score macro:')
print(result)
print("****************************************************************")
return result
I get the error at this line in function resultados:
with torch.no_grad():
# Forward pass, calculate logit predictions
outputs = model(b_input_ids, #toktype_ids=None, #
attention_mask=b_input_mask) #Esto falla
MAIN PROGRAM
trial_data = pd.DataFrame(trial_dataset)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
print('no hay gpu')
print('Found GPU at: {}'.format(device_name))
#import torch# If there's a GPU available...
if torch.cuda.is_available(): # Tell PyTorch to use the GPU.
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0)) # If not...
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
validation_dataloader = crearDataLoad(trial_data,tokenizer)
# obteniendo metricas del modelo generado en el paso anterior
model.eval()
result= resultados(validation_dataloader, model,tokenizer)
You did not move your model to device
, only the data. You need to call model.to(device)
before using it with data located on device
.