Search code examples
machine-learningpytorchode

Parameters not changing while applying Pytorch Minimization fucntion


Code for getting the data:

import pandas as pd
import torch

dataset = pd.read_csv('/kaggle/input/fish-bear/population_data.csv')
years = torch.tensor(dataset['year'], dtype = torch.float64)
fish_pop = torch.tensor(dataset['fish_hundreds'], dtype = torch.float64)
bears_pop = torch.tensor(dataset['bears_hundreds'], dtype = torch.float64)
pop = torch.cat((fish_pop.reshape((51, 1)), bears_pop.reshape((51, 1))), 1)

Ordinary Differential Equation Solver

from typing import List, Callable, Sequence, NamedTuple, Union

class _Tableau(NamedTuple):

    c: List[float]
    b: List[float]
    a: List[List[float]]


rk4_tableau = _Tableau(c=[0.0, 0.5, 0.5, 1.0],
                       b=[1 / 6., 1 / 3., 1 / 3., 1 / 6.],
                       a=[[0.0, 0.0, 0.0, 0.0], [0.5, 0.0, 0.0, 0.0],
                          [0.0, 0.5, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0]])


def explicit_rk(tableau: _Tableau, fcn: Callable[..., torch.Tensor],
                y0: torch.Tensor, t: torch.Tensor,
                params: Sequence[torch.Tensor]):
    c = tableau.c
    a = tableau.a
    b = tableau.b
    s = len(c)
    nt = len(t)

    # set up the results list
    yt_lst: List[torch.Tensor] = []
    yt_lst.append(y0)
    y = yt_lst[-1]
    for i in range(nt - 1):
        t0 = t[i]
        t1 = t[i + 1]
        h = t1 - t0
        ks: List[torch.Tensor] = []
        ksum: Union[float, torch.Tensor] = 0.0
        for j in range(s):
            if j == 0:
                k = fcn(y, t0, params)
            else:
                ak: Union[float, torch.Tensor] = 0.0
                aj = a[j]
                for m in range(j):
                    ak = aj[m] * ks[m] + ak
                k = fcn(h * ak + y, t0 + c[j] * h, params)
            ks.append(k)
            ksum = ksum + b[j] * k
        y = h * ksum + y
        yt_lst.append(y)
    yt = torch.stack(yt_lst, dim=0)
    return yt

def rk4_ivp(fcn: Callable[..., torch.Tensor], y0: torch.Tensor, t: torch.Tensor,
            params: Sequence[torch.Tensor], **kwargs):
    return explicit_rk(rk4_tableau, fcn, y0, t, params)

Minimization Code:

import torch

def lotka_volterra(y, t, params):
    y1, y2 = y
    a, b, c, d = params

    return torch.tensor([a * y1 - b * y1 * y2, c * y2 * y1 - d * y2])

def loss_function(params):

    y0 = torch.tensor([fish_pop[0], bears_pop[0]], dtype = torch.float64)

    t = torch.linspace(years[0], years[-1], len(years), dtype = torch.float64)

    output = rk4_ivp(lotka_volterra, y0, t, params)

    loss = torch.sum((output - pop)**2)
    loss.requires_grad = True
    return loss

def minimize(loss_function, initial_parameters: torch.Tensor):
    list_params = []
    params = initial_parameters
    params.requires_grad = True
    optimizer = torch.optim.SGD([params], lr=0.5)

    for i in range(5):
        optimizer.zero_grad()
        loss: torch.Tensor = loss_function(params)
        loss.backward()
        optimizer.step()
        list_params.append(params.detach().clone())

    return params, list_params

starting_point = torch.nn.Parameter(torch.tensor([1.1, .4, .1, .4], dtype = torch.float64))
minimized_params, list_of_params = minimize(loss_function, starting_point)

loss_function(minimized_params), minimized_params

At the end of iteration the parameters do not get optimised and return as it is.

Result:

(tensor(118.6865, dtype=torch.float64, requires_grad=True),
 Parameter containing:
 tensor([1.1000, 0.4000, 0.1000, 0.4000], dtype=torch.float64,
        requires_grad=True))

Kaggle Notebook Link: https://www.kaggle.com/code/rakshitsingh421/parameter-estimation/edit

I tried to change requires_grad attributes but it didn't worked.


Solution

  • The problem is the line return torch.tensor([a * y1 - b * y1 * y2, c * y2 * y1 - d * y2])

    When you pass those values to torch.tensor, you are constructing an entirely new tensor object that has no relationship to the input values. This means you can't backprop through the new tensor to your params.

    You need to compute the output with torch operations.

    import torch
    import torch.nn as nn
    
    params = nn.Parameter(torch.tensor([1.1, .4, .1, .4], dtype = torch.float64))
    
    y0 = torch.tensor([1., 2.], dtype = torch.float64)
    
    y1, y2 = y0
    a, b, c, d = params
    
    torch.tensor([a * y1 - b * y1 * y2, c * y2 * y1 - d * y2]).requires_grad
    > False
    
    torch.stack([a * y1 - b * y1 * y2, c * y2 * y1 - d * y2]).requires_grad
    > True