I have the following Resnet prototype on Pytorch:
Resnet_Classifier(
(activation): ReLU()
(model): Sequential(
(0): Res_Block(
(mod): Sequential(
(0): Conv1d(1, 200, kernel_size=(5,), stride=(1,), padding=same)
(1): ReLU()
(2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(4): ReLU()
(5): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
(7): ReLU()
(8): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(shortcut): Conv1d(1, 200, kernel_size=(1,), stride=(1,), padding=same)
)
(1): ReLU()
(2): Flatten(start_dim=1, end_dim=-1)
(3): Dropout(p=0.1, inplace=False)
(4): Linear(in_features=40000, out_features=2, bias=True)
(5): Softmax(dim=1)
)
)
Input sample shape is (1, 200).
It seems to be absolutely okay but, when I try to get graph in tensorboard
, I get the following structure:
Somehow my Residual block connected with Linear. Does this connection really corresponds my net structure?
Model definition:
class Res_Block(nn.Module):
def __init__(self, in_ch, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.mod = nn.Sequential(
nn.Conv1d(in_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch),
nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
deepcopy(activation),
nn.BatchNorm1d(out_ch)
)
self.shortcut = nn.Conv1d(in_ch, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.mod(X) + self.shortcut(X)
layers = []
layers.append(Res_Block(1, 200, 5, 1, nn.ReLU()))
layers.append(nn.ReLU())
layers.append(nn.Flatten())
layers.append(nn.Dropout(0.2))
layers.append(nn.Linear(200 * 200, 2))
layers.append(nn.Softmax(dim=1))
R = nn.Sequential(*layers)
I solved the problem by removing nn.Sequential
in Res_Block
__init__
and adding self.l1
, self.l2
...
instead. (I also removed some layers and added maxpool but only after I solved the problem)
class Res_Block(nn.Module):
def __init__(self, in_shape, out_ch, ks, stride, activation):
super(Res_Block, self).__init__()
self.l1 = nn.Conv1d(in_shape, out_ch, ks, stride, padding='same')
self.l2 = deepcopy(activation)
self.l3 = nn.BatchNorm1d(out_ch)
self.l4 = nn.Conv1d(out_ch, out_ch, ks, stride, padding='same')
self.l5 = nn.BatchNorm1d(out_ch)
self.shortcut = nn.Conv1d(in_shape, out_ch, kernel_size=1, stride=1, padding='same')
def forward(self, X):
return self.l5(self.l4(self.l3(self.l2(self.l1(X))))) + self.shortcut(X)
The corresponding tensorboard structure is
The only one left question is why did that help me solve the problem.