nlp pytorch huggingface-transformers bert-language-model attention-model

Getting random output every time on running Next Sentence Prediction code using BERT

Based on the code provided below, I am trying to run NSP (Next Sentence Prediction) on a custom dataset. The loss after training the model is different every time and the model give different accuracies every time. What am I missing or doing wrong?

pip install transformers[torch]
from transformers import BertTokenizer, BertForNextSentencePrediction 
import torch  
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 
model = BertForNextSentencePrediction.from_pretrained('bert-base-uncased')
with open('clean.txt', 'r') as fp:
    text = fp.read().split('\n')
bag = [item for sentence in text for item in sentence.split('.') if item != '']
bag_size = len(bag)
import random
 
sentence_a = []
sentence_b = []
label = []
 
for paragraph in text:
    sentences = [
        sentence for sentence in paragraph.split('.') if sentence != ''
    ]
    num_sentences = len(sentences)
    if num_sentences > 1:
        start = random.randint(0, num_sentences-2)
        # 50/50 whether is IsNextSentence or NotNextSentence
        if random.random() >= 0.5:
            # this is IsNextSentence
            sentence_a.append(sentences[start])
            sentence_b.append(sentences[start+1])
            label.append(0)
        else:
            index = random.randint(0, bag_size-1)
            # this is NotNextSentence
            sentence_a.append(sentences[start])
            sentence_b.append(bag[index])
            label.append(1)
 
inputs = tokenizer(sentence_a, sentence_b, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
inputs['labels'] = torch.LongTensor([label]).T
class MeditationsDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings.input_ids)
 
dataset = MeditationsDataset(inputs)
loader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
from transformers import AdamW
 
# activate training mode
model.train()
# initialize optimizer
optim = AdamW(model.parameters(), lr=5e-6)
 
from tqdm import tqdm  # for our progress bar
 
epochs = 2
 
for epoch in range(epochs):
    # setup loop with TQDM and dataloader
    loop = tqdm(loader, leave=True)
    for batch in loop:
        # initialize calculated gradients (from prev step)
        optim.zero_grad()
        # pull all tensor batches required for training
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        token_type_ids = batch['token_type_ids'].to(device)
        labels = batch['labels'].to(device)
        # process
        outputs = model(input_ids, attention_mask=attention_mask,
                        token_type_ids=token_type_ids,
                        labels=labels)
        # extract loss
        loss = outputs.loss
        # calculate loss for every parameter that needs grad update
        loss.backward()
        # update parameters
        optim.step()
        # print relevant info to progress bar
        loop.set_description(f'Epoch {epoch}')
        loop.set_postfix(loss=loss.item())

In the code below I am testing the model on unseen data:

from torch.nn import functional as f
from torch.nn.functional import softmax
prompt = "sentence 1 text"
prompt2 = "sentence 2 text"
output = tokenizer.encode_plus(prompt,prompt2, return_tensors="pt")
result = model(**output)[0]
prob = softmax(result, dim=1)
print(prob)

So, the value of prob and loss is different every single time for the same unseen data which to the best of my knowledge should be same.

Solution

You need to put the model in evaluation mode. if you use i.e. dropout layers while testing the model you should turn it off. You can do this with

model.eval()

If you don't use this, you will get a different output and loss value because the dropout in your model will close different neurons each time.