Search code examples
pythonpytorchautogradgaussian-mixture-model

Parametric estimation of a Gaussian Mixture Model


I am trying to train a model to estimate a GMM. However, the means of the GMM are calculated each time based on a mean_placement parameter. I am following the solution provided here, I'll copy and paste the original code:

import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as datasets

import torch
from torch import nn
from torch import optim
import torch.distributions as D

num_layers = 8
weights = torch.ones(8,requires_grad=True)
means = torch.tensor(np.random.randn(8,2),requires_grad=True)
stdevs = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=True)

parameters = [weights, means, stdevs]
optimizer1 = optim.SGD(parameters, lr=0.001, momentum=0.9)

num_iter = 10001
for i in range(num_iter):
    mix = D.Categorical(weights)
    comp = D.Independent(D.Normal(means,stdevs), 1)
    gmm = D.MixtureSameFamily(mix, comp)

    optimizer1.zero_grad()
    x = torch.randn(5000,2)#this can be an arbitrary x samples
    loss2 = -gmm.log_prob(x).mean()#-densityflow.log_prob(inputs=x).mean()
    loss2.backward()
    optimizer1.step()

    print(i, loss2)

What I would like to do is this:

num_layers = 8
weights = torch.ones(8,requires_grad=True)
means_coef = torch.tensor(10.,requires_grad=True)
means = torch.tensor(torch.dstack([torch.linspace(1,means_coef.detach().item(),8)]*2).squeeze(),requires_grad=True)
stdevs = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=True)
parameters = [means_coef]
optimizer1 = optim.SGD(parameters, lr=0.001, momentum=0.9)

num_iter = 10001
for i in range(num_iter):
    means = torch.tensor(torch.dstack([torch.linspace(1,means_coef.detach().item(),8)]*2).squeeze(),requires_grad=True)

    mix = D.Categorical(weights)
    comp = D.Independent(D.Normal(means,stdevs), 1)
    gmm = D.MixtureSameFamily(mix, comp)

    optimizer1.zero_grad()
    x = torch.randn(5000,2)#this can be an arbitrary x samples
    loss2 = -gmm.log_prob(x).mean()#-densityflow.log_prob(inputs=x).mean()
    loss2.backward()
    optimizer1.step()

    print(i, means_coef)
    print(means_coef)


However in this case the parameter is not updated and the grad value is always None. Any ideas how to fix this?


Solution

  • According to your instructions I have re-written your model. If you run it you can see that all the parameters are changing after the model is optimized. I also have provided the graph of the model at the end. You can simply modify the GMM class as you need if you want to make a new one.

    import numpy as np
    import matplotlib.pyplot as plt
    import sklearn.datasets as datasets
    
    import torch
    from torch import nn
    from torch import optim
    import torch.distributions as D
    
    class GMM(nn.Module):
        
        def __init__(self, weights, base, scale, n_cell=8, shift=0, dim=2):
            super(GMM, self).__init__()
            self.weight = nn.Parameter(weights)
            self.base = nn.Parameter(base)
            self.scale = nn.Parameter(scale)
            self.grid = torch.arange(1, n_cell+1)
            self.shift = shift
            self.n_cell = n_cell
            self.dim = dim
        
        def trsf_grid(self):
            trsf = (
                torch.log(self.scale * self.grid + self.shift) 
                / torch.log(self.base)
                ).reshape(-1, 1)
            return trsf.expand(self.n_cell, self.dim)
        
        def forward(self, x, std):
            means = self.trsf_grid()
            mix = D.Categorical(self.weight)
            comp = D.Independent(D.Normal(means, std), 1)
            gmm = D.MixtureSameFamily(mix, comp)
            return -gmm.log_prob(x).mean()
    
    if __name__ == "__main__":
        weight = torch.ones(8)
        base = torch.tensor(3.)
        scale = torch.tensor(1.)
        stds = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=False)
        model = GMM(weight, base, scale)
        print(list(model.parameters()))
        
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
        for i in range(1000):
            optimizer.zero_grad()
            x = torch.randn(5000,2)
            loss = model(x, stds)
            loss.backward()
            optimizer.step()
            
        print(list(model.parameters()))
    

    In my case It returned the following parameters:

    [Parameter containing:
    tensor([1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True), Parameter containing:
    tensor(3., requires_grad=True), Parameter containing:
    tensor(1., requires_grad=True)]
    
    [Parameter containing:
    tensor([0.7872, 1.1010, 1.3390, 1.3757, 0.5122, 0.2884, 1.2597, 0.7597],
           requires_grad=True), Parameter containing:
    tensor(3.3207, requires_grad=True), Parameter containing:
    tensor(0.2814, requires_grad=True)]
    

    which indeed shows that the parameters are updating. Also you can see the computation graph below:

    The computation grapg