Search code examples
pythondeep-learningmodelpytorchpre-trained-model

Printing the size of the input and output of all the layers of a pretrained model


I want to print the sizes of the inputs and outputs of all the layers of a pretrained model. I uae this pretrained model as self.feature in my class.

The print of this pretrained model is as follows:

TimeSformer(
  (model): VisionTransformer(
(dropout): Dropout(p=0.0, inplace=False)
(patch_embed): PatchEmbed(
  (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
)
(pos_drop): Dropout(p=0.0, inplace=False)
(time_drop): Dropout(p=0.0, inplace=False)
(blocks): ModuleList(  
  (0): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): Identity()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
  (1): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(  # *********
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True) # @@@@@@@
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): DropPath()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
.
.
.
.
.
.
  (11): Block(
    (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (temporal_attn): Attention(
      (qkv): Linear(in_features=768, out_features=2304, bias=True)
      (proj): Linear(in_features=768, out_features=768, bias=True)
      (proj_drop): Dropout(p=0.0, inplace=False)
      (attn_drop): Dropout(p=0.0, inplace=False)
    )
    (temporal_fc): Linear(in_features=768, out_features=768, bias=True)
    (drop_path): DropPath()
    (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    (mlp): Mlp(
      (fc1): Linear(in_features=768, out_features=3072, bias=True)
      (act): GELU()
      (fc2): Linear(in_features=3072, out_features=768, bias=True)
      (drop): Dropout(p=0.0, inplace=False)
    )
  )
)
(norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) 
(head): Linear(in_features=768, out_features=400, bias=True) 

)

This the the code of my class and my method for printing the size of layers:

class Class(nn.Module):
    def __init__(self, pretrained=False):
        super(Class, self).__init__()
        
        
        self.feature =TimeSformer(img_size=224, num_classes=400, num_frames=8, attention_type='divided_space_time',  
                                           pretrained_model='path/to/the/weight.pyth')

def forward(self, x):
    for layer in self.feature:
        x = layer(x)
        print(x.size())
    return x

I'm using the following approach for printing

But I am facing this error:

TypeError: 'TimeSformer' object is not iterable

How can I print the sizes of all the layers?

Update:

using the Following code receives the error mentioned in the comment:

def forward(self, x, out_consp = False):
layers=list(self.featureExtractor.children())
for layer in layers:
    x = layer(x)
    print(x.size())
return x

Solution

  • You can use hooks to print the shape of the input and the output of each layer. You can use this code to do what you want.

    def hook_function(module, input, output):
        print(f'{module.name} :')
        print(module)
        #print(module)
        if isinstance(input[0], tuple):
            print('input shapes:')
            for elem in input[0]:
                print(elem.shape)
        else:
            print(f'input shape: {input[0].shape}')
        if isinstance(output, tuple):
            print('output shapes:')
            for elem in output:
                print(elem.shape)
        else:
            print(f'output shape: {output.shape}')
        print('')
    
    def set_names(net):
        def recurs(net,parent_name=None):
            for name, mod in net.named_children():
                if parent_name is not None:
                    name = '_'.join([parent_name, name])
                recurs(mod, name)
                setattr(mod,'name',name)
            
        recurs(net)
    def print_shapes(network, dummy_input_shape, device='cuda', eval=True):
        network = network.to(device)
        if eval:
            network.eval()
        else:
            network.train()
            assert dummy_input_shape[0] > 1
        #print(network)
        dummy = torch.randn(dummy_input_shape, device=device)
        set_names(network)
        handles = []
        def attach_hooks(net):
            leaf_layers = 0
            for mod in net.children():
                leaf_layers += 1
                attach_hooks(mod)
            if leaf_layers == 0:
                handles.append(net.register_forward_hook(hook_function))
        attach_hooks(network)
        network(dummy) 
        # if needed
        for handle in handles:
            handle.remove()
    

    Example:

    network = TimeSformer(img_size=224, num_classes=400, num_frames=8, attention_type='divided_space_time',  
                                               pretrained_model='path/to/the/weight.pyth')
    # The behaviour of a forward function could be different during training
    print_shapes(network,(1,3,224,224),'cpu', eval=True)
    print_shapes(network,(2,3,224,224),'cpu', eval=False)
    
    

    A snippet of the output includes a layer that is defined before 'temporal_norm1' layer in 'Block' module but called or executed later (norm1).

    model_blocks_11_temporal_fc :
    Linear(in_features=768, out_features=768, bias=True)
    input shape: torch.Size([2, 1568, 768])
    output shape: torch.Size([2, 1568, 768])
    
    model_blocks_11_norm1 :
    LayerNorm((768,), eps=1e-06, elementwise_affine=True)
    input shape: torch.Size([16, 197, 768])
    output shape: torch.Size([16, 197, 768])