Search code examples
pythontensorflowneural-networknlpconv-neural-network

Adding Convolutional Layer to CNN for NLP analysis


I am facing a problem regarding adding Convolutional Layer to a CNN. With the help of various blog posts, I rebuild a CNN for my purposes. My code looks like that at the moment:

def ConvNet(embeddings, max_sequence_length, num_words, embedding_dim, trainable=False, extra_conv=True):
    embedding_layer = Embedding(num_words,
                                embedding_dim,
                                weights=[embeddings],
                                input_length=max_sequence_length,
                                trainable=trainable)

    sequence_input = Input(shape=(max_sequence_length,), dtype='int32')
    embedded_sequences = embedding_layer(sequence_input)

    convs = []
    filter_sizes = [3, 4, 5]

    for filter_size in filter_sizes:
        l_conv = Conv1D(filters=128, kernel_size=filter_size, activation='relu')(embedded_sequences)
        l_pool = MaxPooling1D(pool_size=3)(l_conv)
        convs.append(l_pool)

    l_merge = concatenate([convs[0], convs[1], convs[2]], axis=1)

    # add a 1D convnet with global maxpooling, instead of Yoon Kim model
    conv = Conv1D(filters=128, kernel_size=3, activation='relu')(embedded_sequences)
    pool = MaxPooling1D(pool_size=3)(conv)

    if extra_conv == True:
        x = Dropout(0.5)(l_merge)
    else:
        x = Dropout(0.5)(pool)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    preds = Dense(1, activation='linear')(x)

    model = Model(sequence_input, preds)
    model.compile(loss='mean_squared_error',
                  optimizer='adadelta',
                  metrics=['mean_squared_error'])
    model.summary()
    return model

The resulting model architecture is like that:

Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 1086)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1086, 300)    532500      input_1[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 1084, 128)    115328      embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_2 (Conv1D)               (None, 1083, 128)    153728      embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, 1082, 128)    192128      embedding_1[0][0]                
__________________________________________________________________________________________________
max_pooling1d_1 (MaxPooling1D)  (None, 361, 128)     0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling1d_2 (MaxPooling1D)  (None, 361, 128)     0           conv1d_2[0][0]                   
__________________________________________________________________________________________________
max_pooling1d_3 (MaxPooling1D)  (None, 360, 128)     0           conv1d_3[0][0]                   
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 1082, 128)    0           max_pooling1d_1[0][0]            
                                                                 max_pooling1d_2[0][0]            
                                                                 max_pooling1d_3[0][0]            
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 1082, 128)    0           concatenate_1[0][0]              
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 138496)       0           dropout_2[0][0]                  
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 128)          17727616    flatten_1[0][0]                  
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 1)            129         dense_3[0][0]                    
==================================================================================================
Total params: 18,721,429
Trainable params: 18,188,929
Non-trainable params: 532,500

So basically, the model looks like that at the moment: enter image description here

So my question is how I could here some more Convolutional layers in my CNN? I tried already a lot but somehow I am not able to do it? I would like to add Convolutional and max pooling layer before concatenate.

I really appreciate any help,

Thanks in advance, Lukas


Solution

  • just append new conv and max pooling layer iniside you for loop. Like this:

    from tensorflow.python.keras import Input, Model
    from tensorflow.python.keras.layers import Embedding, Conv1D, MaxPooling1D, concatenate, Dropout, Flatten, Dense
    
    
    def ConvNet(max_sequence_length, num_words, embedding_dim, trainable=False, extra_conv=True):
        embedding_layer = Embedding(num_words,
                                    embedding_dim,
                                    input_length=max_sequence_length,
                                    trainable=trainable)
    
        sequence_input = Input(shape=(max_sequence_length,), dtype='int32')
        embedded_sequences = embedding_layer(sequence_input)
    
        convs = []
        filter_sizes = [3, 4, 5]
    
        for filter_size in filter_sizes:
            l_conv = Conv1D(filters=128, kernel_size=filter_size, activation='relu')(embedded_sequences)
            l_pool = MaxPooling1D(pool_size=3)(l_conv)
            # TODO edit parameters
            l_conv2 = Conv1D(filters=128, kernel_size=3, activation='relu')(l_pool)
            l_pool2 = MaxPooling1D(pool_size=3)(l_conv2)
            convs.append(l_pool2)
    
        l_merge = concatenate(convs, axis=1)
    
        # add a 1D convnet with global maxpooling, instead of Yoon Kim model
        conv = Conv1D(filters=128, kernel_size=3, activation='relu')(embedded_sequences)
        pool = MaxPooling1D(pool_size=3)(conv)
    
        if extra_conv == True:
            x = Dropout(0.5)(l_merge)
        else:
            x = Dropout(0.5)(pool)
        x = Flatten()(x)
        x = Dense(128, activation='relu')(x)
        preds = Dense(1, activation='linear')(x)
    
        model = Model(sequence_input, preds)
        model.compile(loss='mean_squared_error',
                      optimizer='adadelta',
                      metrics=['mean_squared_error'])
        model.summary()
        return model
    
    if __name__ == "__main__":
        ConvNet(20, 10000, 100)