Search code examples
pythonpython-3.xkerasdeep-learningconv-neural-network

Translation CNN from keras code to pytorch


I'm trying to translate the below 3layer CNN architecture from keras to pytorch. The usage of the model is to predict expression value(input_shape_val) from dna sequence(input_shape_hot). The sequence is one hot encoded. The architecture orignally meant to train the model consecutively CNN (3 layers)-FC (2 layers) with batch normalization and weight dropout were applied after all layers and max-pooling after CNN layers( ref-paper, ref-code).

def POC_model2(input_shape_hot,input_shape_val,DR):

    X_input1 = Input(shape = input_shape_hot)
    X_input2 = Input(shape = input_shape_val)
    # L 1: CONV
    X1 = Conv1D(filters=32, kernel_size=10, strides=1, activation='relu')(X_input1) 
    X1 = BatchNormalization()(X1)
    X1 = Dropout(DR)(X1)
    X1 = MaxPooling1D(pool_size=4, strides=4)(X1)
    # L 3: CONV
    X1 = Conv1D(filters=64, kernel_size=10, strides=1, activation='relu')(X_input1) 
    X1 = BatchNormalization()(X1)
    X1 = Dropout(DR)(X1)
    X1 = MaxPooling1D(pool_size=4, strides=4)(X1)
    # L 2: CONV
    X1 = Conv1D(filters=128, kernel_size=10, strides=1, activation='relu')(X_input1) 
    X1 = BatchNormalization()(X1)
    X1 = Dropout(DR)(X1)
    X1 = MaxPooling1D(pool_size=4, strides=4)(X1)

    X1 = Flatten()(X1)

    X1 = Concatenate(axis=1)([X1,X_input2])
    # fully connected
    X = Dense(64, activation='relu')(X1)
    X = BatchNormalization()(X)
    X = Dropout(DR)(X)

    X = Dense(1)(X)

    model = Model(inputs = [X_input1,X_input2], outputs = X)

    return model

I tried from this code :

from typing import List
class DNA_CNN_test2(nn.Module):
    def __init__(self,
                 seq_len: int,
                 num_filters: List[int] = [32, 64,128],
                 kernel_size: int = 3,
                 p = 0.2):
        super().__init__()
        self.seq_len = seq_len
        # CNN module
        self.conv_net = nn.Sequential()
        num_filters = [4] + num_filters
        for idx in range(len(num_filters) - 1):
            self.conv_net.add_module(
                f"conv_{idx}",
                nn.Conv1d(num_filters[idx], num_filters[idx + 1],
                          kernel_size=kernel_size, padding='same')
            )
            self.conv_net.add_module(f"relu_{idx}", nn.ReLU(inplace=True))
            self.conv_net.add_module(f"batchNor_{idx}",nn.BatchNorm1d(num_filters[idx + 1]))
            self.conv_net.add_module(f"dropout_{idx}",nn.Dropout(0.2))
            self.conv_net.add_module(f"MaxP_{idx}",nn.MaxPool1d(4,stride= 4))
        self.conv_net.add_module("flatten", nn.Flatten())
        self.conv_net.add_module("linear",nn.Linear(num_filters[-1]*seq_len, 1))
        
    def forward(self, xb: torch.Tensor):
        """Forward pass."""
        xb = xb.permute(0, 2, 1) 
        out = self.conv_net(xb)
        return out        

And got error message:

 mat1 and mat2 shapes cannot be multiplied (2048x1920 and 128000x1)

Solution

  • You forgot the add_module function for BN and dropout:

    self.conv_net.add_module(
      f'bn_{idx}',
      nn.BatchNorm1d(num_filters[idx + 1]),
    )
    self.conv_net.add_module(f'dropout_{idx}', nn.Dropout(p))