PyTorch attach extra connection when building model

I have the following Resnet prototype on Pytorch:

Resnet_Classifier(
  (activation): ReLU()
  (model): Sequential(
    (0): Res_Block(
      (mod): Sequential(
        (0): Conv1d(1, 200, kernel_size=(5,), stride=(1,), padding=same)
        (1): ReLU()
        (2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
        (4): ReLU()
        (5): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): Conv1d(200, 200, kernel_size=(5,), stride=(1,), padding=same)
        (7): ReLU()
        (8): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (shortcut): Conv1d(1, 200, kernel_size=(1,), stride=(1,), padding=same)
    )
    (1): ReLU()
    (2): Flatten(start_dim=1, end_dim=-1)
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=40000, out_features=2, bias=True)
    (5): Softmax(dim=1)
  )
)

Input sample shape is (1, 200).

It seems to be absolutely okay but, when I try to get graph in tensorboard, I get the following structure:

Somehow my Residual block connected with Linear. Does this connection really corresponds my net structure?

Model definition:

class Res_Block(nn.Module):
    def __init__(self, in_ch, out_ch, ks, stride, activation):    
        super(Res_Block, self).__init__()
        self.mod = nn.Sequential(
            nn.Conv1d(in_ch, out_ch, ks, stride, padding='same'),
            deepcopy(activation),
            nn.BatchNorm1d(out_ch),

            nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
            deepcopy(activation),
            nn.BatchNorm1d(out_ch),

            nn.Conv1d(out_ch, out_ch, ks, stride, padding='same'),
            deepcopy(activation),
            nn.BatchNorm1d(out_ch)
        )
        self.shortcut = nn.Conv1d(in_ch, out_ch, kernel_size=1, stride=1, padding='same')
    
    def forward(self, X):
        return self.mod(X) + self.shortcut(X)

layers = []
layers.append(Res_Block(1, 200, 5, 1, nn.ReLU()))
layers.append(nn.ReLU())
layers.append(nn.Flatten())
layers.append(nn.Dropout(0.2))
layers.append(nn.Linear(200 * 200, 2))
layers.append(nn.Softmax(dim=1))
R = nn.Sequential(*layers)

Solution

I solved the problem by removing nn.Sequential in Res_Block __init__ and adding self.l1, self.l2 ... instead. (I also removed some layers and added maxpool but only after I solved the problem)

class Res_Block(nn.Module):
    def __init__(self, in_shape, out_ch, ks, stride, activation):          
        super(Res_Block, self).__init__()

        self.l1 = nn.Conv1d(in_shape, out_ch, ks, stride, padding='same')
        self.l2 = deepcopy(activation)
        self.l3 = nn.BatchNorm1d(out_ch)
        self.l4 = nn.Conv1d(out_ch, out_ch, ks, stride, padding='same')
        self.l5 = nn.BatchNorm1d(out_ch)

        self.shortcut = nn.Conv1d(in_shape, out_ch, kernel_size=1, stride=1, padding='same')
    
    def forward(self, X):
        return self.l5(self.l4(self.l3(self.l2(self.l1(X))))) + self.shortcut(X)

The corresponding tensorboard structure is

The only one left question is why did that help me solve the problem.