Search code examples
pythontorchpytorchloss-function

Exploding loss in pyTorch


I am trying to train a latent space model in pytorch. The model is relatively simple and just requires me to minimize my loss function but I am getting an odd error. After running for a short while the loss suddenly explodes upwards.

import numpy as np
import scipy.sparse.csgraph as csg
import torch
from torch.autograd import Variable
import torch.autograd as autograd
import matplotlib.pyplot as plt
%matplotlib inline

def cmdscale(D):
    # Number of points                                                                        
    n = len(D)

    # Centering matrix                                                                        
    H = np.eye(n) - np.ones((n, n))/n

    # YY^T                                                                                    
    B = -H.dot(D**2).dot(H)/2

    # Diagonalize                                                                             
    evals, evecs = np.linalg.eigh(B)

    # Sort by eigenvalue in descending order                                                  
    idx   = np.argsort(evals)[::-1]
    evals = evals[idx]
    evecs = evecs[:,idx]

    # Compute the coordinates using positive-eigenvalued components only                      
    w, = np.where(evals > 0)
    L  = np.diag(np.sqrt(evals[w]))
    V  = evecs[:,w]
    Y  = V.dot(L)

    return Y, evals

Y = np.array([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
              [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.],
              [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
              [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
              [0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1.],
              [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
              [1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1.],
              [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
              [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
              [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.],
              [0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0.],
              [0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.],
              [0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0.]])

temp = Y[~np.all(Y == 0, axis=1)]
temp = temp[:,~np.all(Y == 0, axis=1)]
Y = temp

n = np.shape(Y)[0]
k = 2

D = csg.shortest_path(Y, directed=True)
Z = cmdscale(D)[0][:,0:k]
Z = Z - Z.mean(axis=0, keepdims=True)

tZ = autograd.Variable(torch.Tensor(Z), requires_grad=True)
B = autograd.Variable(torch.Tensor([0]), requires_grad=True)
tY = torch.autograd.Variable(torch.Tensor(Y), requires_grad=False)

#calculating pairwise euclidean distance
def distMatrix(m):
    n = m.size(0)
    d = m.size(1)
    x = m.unsqueeze(1).expand(n, n, d)
    y = m.unsqueeze(0).expand(n, n, d)
    return torch.sqrt(torch.pow(x - y, 2).sum(2) + 1e-4)

def loss(tY):
    d = -distMatrix(tZ)+B
    sigmoidD = torch.sigmoid(d)
    #removing diagonal
    reduce = tY*torch.log(sigmoidD)+(1-tY)*torch.log(1-sigmoidD)
    reduce[torch.eye(n).byte()] = 0
    return -reduce.sum()

losses = []
learning_rate = 1e-4
l = loss(tY)
stepSize = 1000

for i in range(stepSize):
    l.backward(retain_graph=True)
    losses.append(float(loss(tY)))
    tZ.data = tZ.data - learning_rate * tZ.grad.data
    B.data = B.data - learning_rate * B.grad.data

    tZ.grad.data.zero_()
    B.grad.data.zero_()

plt.subplot(122)
plt.plot(losses)
plt.title('Loss')
plt.xlabel('Iteration')
plt.ylabel('loss')

plt.show()

enter image description here

shouldnt the loss keep going down? or atleast converge to some point? I must've done something wrong, I am new to pytorch, any hints or nudges in the right direction would be highly appreciated!


Solution

  • The issue was that I defined my loss

    l = loss(tY)
    

    outside of the loop that ran and updated my gradients, I am not entirely sure why it had the effect that it did, but moving the loss function definition inside of the loop solved the problem, resulting in this loss:

    enter image description here