I am training a model to predict pose using a custom Pytorch
model. However, V1 below never learns (params don't change). The output is connected to the backdrop graph and grad_fn=MmBackward
I can't understand why V1 isn't learning but V2 is?
class cam_pose_transform_V1(torch.nn.Module):
def __init__(self):
super(cam_pose_transform, self).__init__()
self.elevation_x_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
self.azimuth_y_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
self.z_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
def forward(self, x):
exp_i = torch.zeros((4,4))
c1 = torch.cos(self.elevation_x_rotation_radians)
s1 = torch.sin(self.elevation_x_rotation_radians)
c2 = torch.cos(self.azimuth_y_rotation_radians)
s2 = torch.sin(self.azimuth_y_rotation_radians)
c3 = torch.cos(self.z_rotation_radians)
s3 = torch.sin(self.z_rotation_radians)
rotation_in_matrix = torch.tensor([
[c2, s2 * s3, c3 * s2],
[s1 * s2, c1 * c3 - c2 * s1 * s3, -c1 * s3 - c2 * c3 * s1],
[-c1 * s2, c3 * s1 + c1 * c2 * s3, c1 * c2 * c3 - s1 * s3]
], requires_grad=True)
exp_i[:3, :3] = rotation_in_matrix
exp_i[3, 3] = 1.
return torch.matmul(exp_i, x)
However, this version learns as expected (params and loss change) and also has grad_fn=MmBackward
on the output:
def vec2ss_matrix(vector): # vector to skewsym. matrix
ss_matrix = torch.zeros((3,3))
ss_matrix[0, 1] = -vector[2]
ss_matrix[0, 2] = vector[1]
ss_matrix[1, 0] = vector[2]
ss_matrix[1, 2] = -vector[0]
ss_matrix[2, 0] = -vector[1]
ss_matrix[2, 1] = vector[0]
return ss_matrix
class cam_pose_transform_V2(torch.nn.Module):
def __init__(self):
super(camera_transf, self).__init__()
self.w = torch.nn.Parameter(torch.normal(0., 1e-6, size=(3,)))
self.v = torch.nn.Parameter(torch.normal(0., 1e-6, size=(3,)))
self.theta = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
def forward(self, x):
exp_i = torch.zeros((4,4))
w_skewsym = vec2ss_matrix(self.w)
v_skewsym = vec2ss_matrix(self.v)
exp_i[:3, :3] = torch.eye(3) + torch.sin(self.theta) * w_skewsym + (1 - torch.cos(self.theta)) * torch.matmul(w_skewsym, w_skewsym)
exp_i[:3, 3] = torch.matmul(torch.eye(3) * self.theta + (1 - torch.cos(self.theta)) * w_skewsym + (self.theta - torch.sin(self.theta)) * torch.matmul(w_skewsym, w_skewsym), self.v)
exp_i[3, 3] = 1.
return torch.matmul(exp_i, x)
In the training loop I printed the .grad
attributes using:
print([i.grad for i in list(cam_pose.parameters())])
print([i.grad for i in list(cam_pose.parameters())])
# V1
[None, None, None]
[None, None, None]
# V2
[None, None, None]
[tensor([-0.0032, 0.0025, -0.0053]), tensor([ 0.0016, -0.0013, 0.0054]), tensor(-0.0559)]
Nothing else in the code was changed, just swapped V1 model for V2.
this is your problem right here:
rotation_in_matrix = torch.tensor([
[c2, s2 * s3, c3 * s2],
[s1 * s2, c1 * c3 - c2 * s1 * s3, -c1 * s3 - c2 * c3 * s1],
[-c1 * s2, c3 * s1 + c1 * c2 * s3, c1 * c2 * c3 - s1 * s3]], requires_grad=True)
you are creating a tensor out of a list of tensors, which is not a differentiable operation -- i.e. there's no gradient flow from rotation_in_matrix
to its elements c1..c3
the solution would be to create the rotation_in_matrix
using tensor operations like stack and cat instead