python tensorflow conv-neural-network initializer

Using weights initializer with tf.nn.conv2d

When using tf.layers.conv2d, setting the initializer is easy, it can be done through its parameter. But what if I use tf.nn.conv2d? I use this code. Is this equivalent to setting the kernel_initializer parameter in tf.layers.conv2d? Although the program runs without errors, I don't know how to verify whether it does what it is expected do.

 with tf.name_scope('conv1_2') as scope:
        kernel = tf.get_variable(initializer=tf.contrib.layers.xavier_initializer(), 
                                 shape=[3, 3, 32, 32], name='weights')
        conv = tf.nn.conv2d(conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[32], dtype=tf.float32),
                             trainable=True, name='biases')
        out = tf.nn.bias_add(conv, biases)
        self.conv1_2 = tf.nn.relu(out, name=scope)
        self.parameters += [kernel, biases]

Solution

The operation underneath is the same (see here).

As for the kernel and its initialization, I took a glimpse in the code and it looked the same... the layers.conv2d call a tf.get_variable at the end of the day.

But I wanted to see it empirically, so here is a test code that declares a conv2d using each method (tf.layers.conv2d and tf.nn.conv2d), evaluates the initialized kernels and compares them.

I've arbitrarily set the things that shouldn't interfere in the comparison, such as an input tensor and the strides.

import tensorflow as tf
import numpy as np


# the way you described in your question
def _nn(input_tensor, initializer, filters, size):
    kernel = tf.get_variable(
        initializer=initializer, 
        shape=[size, size, 32, filters],
        name='kernel')

    conv = tf.nn.conv2d(
        input=input_tensor,
        filter=kernel,
        strides=[1, 1, 1, 1],
        padding='SAME')

    return kernel

# the other way
def _layer(input_tensor, initializer, filters, size):
    tf.layers.conv2d(
        inputs=input_tensor,
        filters=filters,
        kernel_size=size,
        kernel_initializer=initializer)

    # 'conv2d/kernel:0' is the name of the generated kernel
    return tf.get_default_graph().get_tensor_by_name('conv2d/kernel:0')

def _get_kernel(method):
    # an isolated context for each conv2d
    graph = tf.Graph()
    sess = tf.Session(graph=graph)

    with graph.as_default(), sess.as_default():
        # important so that same randomness doesnt play a role
        tf.set_random_seed(42)

        # arbitrary input tensor with compatible shape
        input_tensor = tf.constant(1.0, shape=[1, 64, 64, 32])

        initializer = tf.contrib.layers.xavier_initializer()

        kernel = method(
            input_tensor=input_tensor,
            initializer=initializer,
            filters=32,
            size=3)

        sess.run(tf.global_variables_initializer())
        return sess.run(kernel)

if __name__ == '__main__':
    kernel_nn = _get_kernel(_nn)
    kernel_layer = _get_kernel(_layer)

    print('kernels are ', end='')
    # compares shape and values
    if np.array_equal(kernel_layer, kernel_nn):
        print('exactly the same')
    else:
        print('not the same!')

And the output is... kernels are exactly the same.

The docs, btw: tf.nn.conv2d and tf.layers.conv2d.