Thanks for your attention. I just can't update the parameters of my model. It stays the same although I backward my loss and step the optimiser. The codes are below.
for k in range(100):
c_train = cluster.forward(context)
# pdb.set_trace() # debug here
loss_Cluster = cluster_L(train_num, args['lambda_c'], scalar_f, c_train)
#loss_Cluster = F.cross_entropy(c_train, test) # just for testing, proving that loss_Cluster is right
optimiser_c.zero_grad()
loss_Cluster.backward()
grad_norm = th.nn.utils.clip_grad_norm_(c_param, 10)
# for para in cluster.parameters():
# print(para)
optimiser_c.step()
some configs and the cluster model:
args = {'c_emb':16, 'use_cuda':False, 'ally_type':[1,0,0,0,0,0,0,0,0],
'enemy_type':[1,0,0,0,0,0,0,0,0], 'n_classes':5, 'lambda_c':2}
cluster = CLUSTER_L(args)
context = th.tensor(args['ally_type']+args['enemy_type'], dtype=th.float)
train_num = args['n_classes']*args['lambda_c']
scalar_f = (args['n_classes']*(train_num-1))/(args['lambda_c']*args['lambda_c'])
c_param = list(cluster.parameters())
optimiser_c = RMSprop(c_param, lr = 0.1)
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
class CLUSTER_L(nn.Module):
def __init__(self, args):
super(CLUSTER_L, self).__init__()
self.args = args
self.embed_dim = self.args['c_emb']
if self.args['use_cuda']:
torch.cuda.set_device(torch.device('cuda:0'))
self.input_size = len(self.args['ally_type'])+len(self.args['enemy_type'])
self.n = self.args['n_classes']
self.lbd = self.args['lambda_c']
# the input of cl should be 1*2T , output should be 1*(2T*self.n*self.lbd)
self.cl = nn.Sequential(nn.Linear(self.input_size, self.embed_dim),
nn.ReLU(),
nn.Linear(self.embed_dim, self.embed_dim),
nn.ReLU(),
nn.Linear(self.embed_dim, self.n*self.lbd*self.input_size)
)
self.af = nn.ReLU()
# inputs should be 1*2T size
def forward(self, inputs):
d_outputs = self.cl(inputs).view(self.n*self.lbd, self.input_size)
outputs = ((self.af(d_outputs)+0.1)*10).round() # make sure the outputs are positive and >=1
print(outputs)
outputs2 = outputs*inputs # mask and output
return outputs2
I've tried changing the loss function to some standard functions in nn.functional such as cross_entropy but the issue remains the same.
Could anyone tell me how can I update the parameters of the model(cluster)? I appreciate your help.
Just remove the round()
and modify the view to keep the batch dim.