I don't understand why running my model which is a single transformer encoder layer is deterministic but after I train it, it becomes non deterministic?
import torch
from torch import nn
import torch.optim as optim
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.encoder_layer = nn.TransformerEncoderLayer(d_model=1, nhead=1, dim_feedforward=2)
self.trans = nn.TransformerEncoder(self.encoder_layer, num_layers=1)
def forward(self, x):
y = self.trans(x)
y = torch.flatten(y)
y = nn.Linear(2,1)(y)
return y
model = MyModel()
model.train()
optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(1):
training = torch.rand(size=(2,1))
labels = training[0] + 1
optimizer.zero_grad()
outputs = model(training)
loss = nn.MSELoss()(outputs, labels)
# print(loss)
loss.backward()
optimizer.step()
model.eval()
x = torch.tensor([[1.],[1.]])
print(model(x))
If I run the last line multiple times I get different output. Why?
The problem is with the my linear layer being reinitialized everytime inference is run. Has nothing to do with the encoder.