Search code examples
pythontensorflowconv-neural-networkinitializer

Using weights initializer with tf.nn.conv2d


When using tf.layers.conv2d, setting the initializer is easy, it can be done through its parameter. But what if I use tf.nn.conv2d? I use this code. Is this equivalent to setting the kernel_initializer parameter in tf.layers.conv2d? Although the program runs without errors, I don't know how to verify whether it does what it is expected do.

 with tf.name_scope('conv1_2') as scope:
        kernel = tf.get_variable(initializer=tf.contrib.layers.xavier_initializer(), 
                                 shape=[3, 3, 32, 32], name='weights')
        conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[32], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        self.conv1_2 = tf.nn.relu(out, name=scope)
        self.parameters += [kernel, biases]

Solution

  • The operation underneath is the same (see here).

    As for the kernel and its initialization, I took a glimpse in the code and it looked the same... the layers.conv2d call a tf.get_variable at the end of the day.

    But I wanted to see it empirically, so here is a test code that declares a conv2d using each method (tf.layers.conv2d and tf.nn.conv2d), evaluates the initialized kernels and compares them.

    I've arbitrarily set the things that shouldn't interfere in the comparison, such as an input tensor and the strides.

    import tensorflow as tf
    import numpy as np
    
    
    # the way you described in your question
    def _nn(input_tensor, initializer, filters, size):
        kernel = tf.get_variable(
            initializer=initializer, 
            shape=[size, size, 32, filters],
            name='kernel')
    
        conv = tf.nn.conv2d(
            input=input_tensor,
            filter=kernel,
            strides=[1, 1, 1, 1],
            padding='SAME')
    
        return kernel
    
    # the other way
    def _layer(input_tensor, initializer, filters, size):
        tf.layers.conv2d(
            inputs=input_tensor,
            filters=filters,
            kernel_size=size,
            kernel_initializer=initializer)
    
        # 'conv2d/kernel:0' is the name of the generated kernel
        return tf.get_default_graph().get_tensor_by_name('conv2d/kernel:0')
    
    def _get_kernel(method):
        # an isolated context for each conv2d
        graph = tf.Graph()
        sess = tf.Session(graph=graph)
    
        with graph.as_default(), sess.as_default():
            # important so that same randomness doesnt play a role
            tf.set_random_seed(42)
    
            # arbitrary input tensor with compatible shape
            input_tensor = tf.constant(1.0, shape=[1, 64, 64, 32])
    
            initializer = tf.contrib.layers.xavier_initializer()
    
            kernel = method(
                input_tensor=input_tensor,
                initializer=initializer,
                filters=32,
                size=3)
    
            sess.run(tf.global_variables_initializer())
            return sess.run(kernel)
    
    if __name__ == '__main__':
        kernel_nn = _get_kernel(_nn)
        kernel_layer = _get_kernel(_layer)
    
        print('kernels are ', end='')
        # compares shape and values
        if np.array_equal(kernel_layer, kernel_nn):
            print('exactly the same')
        else:
            print('not the same!')
    

    And the output is... kernels are exactly the same.

    The docs, btw: tf.nn.conv2d and tf.layers.conv2d.