Search code examples
pytorchmathematical-optimizationtorch

PyTorch: why is the loss unchanging, in this simple example?


I'm writing a code example to do a simple linear projection (like PCA) in PyTorch. Everything appears to be OK except that the loss does not change as training progresses. Changing the learning rate doesn't affect this, and it's a simple one-dimensional problem so the loss should certainly be changing. What am I missing here?

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as nnF


class PCArot2D(nn.Module):
    "2D PCA rotation, expressed as a gradient-descent problem"
    def __init__(self):
        super(PCArot2D, self).__init__()
        self.theta = nn.Parameter(torch.tensor(np.random.random() * 2 * np.pi))

    def getrotation(self):
        sintheta = torch.sin(self.theta)
        costheta = torch.cos(self.theta)
        return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)

    def forward(self, x):
        xmeans = torch.mean(x, dim=1, keepdim=True)
        rot = self.getrotation()

        return torch.mm(rot, x - xmeans)

def covariance(y):
    "Calculates the covariance matrix of its input (as torch variables)"
    ymeans = torch.mean(y, dim=1, keepdim=True)
    ycentred = y - ymeans
    return torch.mm(ycentred, ycentred.T) / ycentred.shape[1]


net = PCArot2D()

example2 = torch.tensor(np.random.randn(2, 33))


# define a loss function and an optimiser
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.1)

# train the network
num_epochs = 1000
for epoch in range(num_epochs):
    optimizer.zero_grad()
    # forward + backward + optimize
    outputs = net(torch.DoubleTensor(example2))
    # the covariance between output channels is the measure that we wish to minimise
    covariance = (outputs[0, :] * outputs[1, :]).mean()
    loss = criterion(covariance, torch.tensor(0, dtype=torch.double))

    loss.backward()
    optimizer.step()

    running_loss = loss.item()
    if ((epoch & (epoch - 1)) == 0) or epoch==(num_epochs-1): # don't print on all epochs
        # print statistics
        print('[%d] loss: %.8f' %
            (epoch, running_loss))

print('Finished Training')

Output:

[0] loss: 0.00629047
[1] loss: 0.00629047
[2] loss: 0.00629047
[4] loss: 0.00629047
[8] loss: 0.00629047
etc

Solution

  • It seems the problem is in your getrotation function. When creating a new tensor from the other tensors it is not back-probable anymore:

    def getrotation(self):
        sintheta = torch.sin(self.theta)
        costheta = torch.cos(self.theta)
        return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
    

    So you need to find some other way to construct your return tensor.

    Here is one suggestion that seems to work using torch.cat:

    def getrotation(self):
        sintheta = torch.sin(self.theta)
        costheta = torch.cos(self.theta)
        #return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
        A = torch.cat([costheta.unsqueeze(0), -sintheta.unsqueeze(0)], dim=0)
        B = torch.cat([sintheta.unsqueeze(0), costheta.unsqueeze(0)], dim=0)
        return torch.cat([A.unsqueeze(0), B.unsqueeze(0)], dim=0).double()
    

    After implementing this change the loss changes:

    [0] loss: 0.00765365
    [1] loss: 0.00764726
    [2] loss: 0.00764023
    [4] loss: 0.00762607
    [8] loss: 0.00759777
    [16] loss: 0.00754148
    [32] loss: 0.00742997
    [64] loss: 0.00721117
    [128] loss: 0.00679025
    [256] loss: 0.00601233
    [512] loss: 0.00469085
    [999] loss: 0.00288501
    Finished Training
    

    I hope this helps!


    Edit: A simpler and prettier version by @DanStowell:

    def getrotation(self): 
        sintheta = torch.sin(net.theta).double().unsqueeze(0)
        costheta = torch.cos(net.theta).double().unsqueeze(0) 
        return torch.cat([costheta, -sintheta, sintheta, costheta]).reshape((2,2))