python machine-learning deep-learning pytorch neural-network

How to implement self daptive weight in neural network in Pytorch

I want to develop a Physics Informed Neural Network model in Pytorch. My network should be trained based on two losses: boundary condition (BC) and partial derivative equation (PDE). I am adding these two losses but the problem is that the BC is controlling the main loss, like the following figure:

This way I make asimple finite difference calculation for my 1D heat conduction:

import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from pyDOE import lhs

######### Finite difference solution
# geometry:
L = 1 # length of the rod
# mesh:
dx = 0.01
nx = int(L/dx) + 1
x = np.linspace(0, L, nx)
# temporal grid:
t_sim = 1
dt = 0.01
nt = int (t_sim/dt)
# parametrization
alpha = 0.14340344168260039
# IC
t_ic = 4
# BC
t_left = 5 # left side with 6 °C temperature
t_right = 3 # right side with 4 °C temperature
# Results
T = np.ones(nx) * t_ic
all_T = []
for i in range (0, nt):
    Tn = T.copy()
    T[1:-1] = Tn[1:-1] + dt/(dx++2) * alpha * (Tn[2:] - 2*Tn[1:-1] + Tn[0:-2])
    T[0] = t_left
    T[-1] = t_right
    all_T.append(Tn)

Then,data is prepared for the PINN model through the next block of code:

x = torch.linspace(0, L, nx, dtype=torch.float32)
t = torch.linspace(0, t_sim, nt, dtype=torch.float32)
T, X = torch.meshgrid(t,x)
Temps = np.concatenate (all_T).reshape(nt,nx)
x_test = torch.hstack((X.transpose(1,0).flatten()[:,None], T.transpose(1,0).flatten()[:,None])) 
y_test = torch.from_numpy(Temps) # I suppose it is the ground truth
lb = x_test[0] # lower boundary
ub = x_test[-1] # upper boundary
left_x = torch.hstack((X[:,0][:,None], T[:,0][:,None])) # x and t of left boundary
left_y = torch.ones(left_x.shape[0], 1) * t_left # Temperature of left boundary
left_y[0,0] = t_ic
right_x = torch.hstack((X[:,-1][:,None], T[:,0][:,None]))  # x and t of right boundary
right_y = torch.ones(right_x.shape[0], 1) * t_right # Temperature of right boundary
right_y[0,0] = t_ic
bottom_x = torch.hstack((X[0,1:-1][:,None], T[0,1:-1][:,None])) # x and t of IC
bottom_y = torch.ones(bottom_x.shape[0], 1) * t_ic # Temperature of IC
No_BC = 1 # 50 percent of the BC data are used from training
No_IC = 1 # 75 percent of the IC data are used from training
idx_l = np.random.choice(left_x.shape[0], int (left_x.shape[0]*No_BC), replace=False)
idx_r = np.random.choice(right_x.shape[0], int (right_x.shape[0]*No_BC), replace=False)
idx_b = np.random.choice(bottom_x.shape[0], int (bottom_x.shape[0]*No_IC), replace=False)
X_train_No = torch.vstack([left_x[idx_l,:], right_x[idx_r,:], bottom_x[idx_b,:]])
Y_train_No = torch.vstack([left_y[idx_l,:], right_y[idx_r,:], bottom_y[idx_b,:]])
N_f = 5000
X_train_Nf = lb + (ub-lb)*lhs(2,N_f) 
f_hat = torch.zeros(X_train_Nf.shape[0], 1, dtype=torch.float32) # zero array for loss of PDE

This is my script for PINN and I very much appreciate your help:

class FCN(nn.Module):
    ##Neural Network
    def __init__(self,layers):
        super().__init__() #call __init__ from parent class 
        self.activation = nn.Tanh()
        self.loss_function = nn.MSELoss(reduction ='mean')
        'Initialise neural network as a list using nn.Modulelist'  
        self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)]) 
        self.iter = 0
        'Xavier Normal Initialization'
        for i in range(len(layers)-1):         
            nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)            
            nn.init.zeros_(self.linears[i].bias.data)   
    'foward pass'
    def forward(self,x):
        if torch.is_tensor(x) != True:         
            x = torch.from_numpy(x)                
        a = x.float()
        for i in range(len(layers)-2):  
            z = self.linears[i](a)              
            a = self.activation(z)    
        a = self.linears[-1](a)
        return a
    'Loss Functions'
    #Loss BC
    def lossBC(self, x_BC, y_BC):
        loss_BC = self.loss_function(self.forward(x_BC),y_BC)
        return loss_BC.float()
    #Loss PDE
    def lossPDE(self,x_PDE):
        g = x_PDE.clone()
        g.requires_grad = True # Enable differentiation
        f = self.forward(g)
        f_x_t = torch.autograd.grad(f,g,torch.ones([g.shape[0],1]).to(device),retain_graph=True, create_graph=True)[0] #first derivative
        f_xx_tt = torch.autograd.grad(f_x_t,g,torch.ones(g.shape).to(device), create_graph=True)[0]#second derivative
        f_t = f_x_t[:,[1]]
        f_xx = f_xx_tt[:,[0]]
        f = f_t - alpha * f_xx
        return self.loss_function(f,f_hat).float()
      
    def loss(self,x_BC,y_BC,x_PDE):
        loss_bc = self.lossBC(x_BC.float(),y_BC.float())
        loss_pde = self.lossPDE(x_PDE.float())
        return loss_bc.float() + loss_pde.float()

And this is how I make the model, arrays representing losses and finally the plot:

layers = np.array([2, 50, 50, 50, 50, 50, 1])
PINN = FCN(layers)
optimizer = torch.optim.Adam(PINN.parameters(), lr=0.001)
def closure():
    optimizer.zero_grad()
    loss_p = PINN.lossPDE(X_train_Nf)
    loss_p.backward()
    loss_b = PINN.lossBC(X_train_No, Y_train_No)
    loss_b.backward()
    return loss_b + loss_p
total_l = np.array([])
BC_l = np.array([])
PDE_l = np.array([])
test_BC_l = np.array([])
for i in range(10000):
    loss = optimizer.step(closure)
    total_l = np.append(total_l, loss.cpu().detach().numpy())
    PDE_l = np.append (PDE_l, PINN.lossPDE(X_train_Nf).cpu().detach().numpy())
    BC_l = np.append(BC_l, PINN.lossBC(X_train_No, Y_train_No).cpu().detach().numpy())
    with torch.no_grad():
        test_loss = PINN.lossBC(X_test, Y_test.flatten().view(-1,1))
        test_BC_l = np.append(test_BC_l, test_loss.cpu().detach().numpy())
import matplotlib.pyplot as plt
fig,ax=plt.subplots(1,1, figsize=(9,9))
ax.plot(PDE_l, c = 'g', lw=2, label='PDE loss in train')
ax.plot(BC_l, c = 'k', lw=2, label='BC loss in train')
ax.plot(test_BC_l, c = 'r', lw=2, label='BC loss in test')
ax.plot(total_l, c = 'b', lw=2, label='total loss in train')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
plt.legend()
plt.show()

Solution

You should not add the boundary and PDE based loss while performing the backpropagation. Backpropagate iteratively on the PDE and the number of different boundary conditions used (Dirichlet or Neumann). When you add both of them, the network is not learning any thing about the BC, as the majority of the loss is being generated from the PDE. So, the network learns more about the PDE based loss and none about the BC, as it is clearly evident from your graph. The loss function should be something like this : for _ in different_loss_types: 1) PDE loss (backprop) on PDE 2) BC loss (backprop on BC)