tensorflow deep-learning generative-adversarial-network

Tensorflow GAN: "No gradients provided for any variable"

I'm trying to set up a GAN with TF but I'm to stupid. I searched the web but couldn't find an answer.

When I run the code provided I get:

gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(gen_loss, var_list=gen_vars)

ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables

def generator(z, activation=tf.nn.relu, reuse=False):
    shape = z.get_shape().as_list()
    weight_init = tf.random_normal_initializer(stddev=0.1)
    bias_init = tf.constant_initializer(0.0)

    fc1_units = 256
    fc1_weights = tf.get_variable('gen_fc1_weights', (shape[1], fc1_units), dtype=tf.float32, initializer=weight_init)
    fc1_biases = tf.get_variable('gen_fc1_biases', (fc1_units), initializer=bias_init)
    fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)

    fc2_units = 784
    fc2_weights = tf.get_variable('gen_fc2_weights', (fc1_units, fc2_units), dtype=tf.float32, initializer=weight_init)
    fc2_biases = tf.get_variable('gen_fc2_biases', (fc2_units), initializer=bias_init)
    fc2 = activation(tf.matmul(fc1, fc2_weights) + fc2_biases)

    output = tf.nn.sigmoid(fc2, name='gen_sigmoid_output')
    return output

def discriminator(X, activation=tf.nn.relu):
    shape = z.get_shape().as_list()
    weight_init = tf.random_normal_initializer(stddev=0.1)
    bias_init = tf.constant_initializer(0.0)

    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):

        fc1_units = 1024
        fc1_weights = tf.get_variable('dis_fc1_weights', (shape[1], fc1_units), dtype=tf.float32, initializer=weight_init)
        fc1_biases = tf.get_variable('dis_fc1_biases', (fc1_units), initializer=bias_init)
        fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)

        fc2_units = 1
        fc2_weights = tf.get_variable('dis_fc2_weights', (fc1_units, fc2_units), dtype=tf.float32, initializer=weight_init)
        fc2_biases = tf.get_variable('dis_fc2_biases', (fc2_units), initializer=bias_init)
        fc2 = tf.matmul(fc1, fc2_weights) + fc2_biases

        sigmoid_out = tf.nn.sigmoid(fc2, name='dis_sigmoid_output')
    return sigmoid_out, fc2

X = tf.placeholder(tf.float32, shape=(real_batch_size, 28*28), name='X')
z = tf.placeholder(dtype=tf.float32, shape=(fake_batch_size, 100), name='z')

gen = generator(z)
dis_real, dis_real_logits = discriminator(X)
dis_fake, dis_fake_logits = discriminator(gen)


dis_real_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dis_real), logits=dis_real_logits))
dis_fake_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dis_fake), logits=dis_fake_logits))
dis_loss = dis_real_loss + dis_fake_loss
gen_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dis_fake), logits=dis_fake_logits))

train_vars = tf.trainable_variables()
dis_vars = [var for var in train_vars if 'dis_' in var.name]
gen_vars = [var for var in train_vars if 'gen_' in var.name]

dis_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(dis_loss, var_list=dis_vars)
gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(gen_loss, var_list=gen_vars)

Solution

Your problem is in how you filter the variables:

dis_vars = [var for var in train_vars if 'dis_' in var.name]
gen_vars = [var for var in train_vars if 'gen_' in var.name]

You defined the variables for the discriminator in the discriminator scope and of the generator in the / (no prefix) scope, hence your filter just filter out every variable.

You can gather the variables filtering the correctly:

dis_vars = [var for var in train_vars if 'discriminator' in var.name]
gen_vars = [var for var in train_vars if 'discriminator' not in var.name]

Moreover, the error states "No gradients provided for any variable, check your graph for ops that do not support gradients, between variables [...] and loss Tensor("Mean_2:0", shape=(), dtype=float32)"

In fact, the problem is in the loss tensor.

The loss tensor is the evaluation of the discriminator when the input is the generator output. The discriminator definition is wrong, in fact, you're referring to a variable z never defined. Hence, if you update your discriminator code using X instead of z it works:

import tensorflow as tf


def generator(z, activation=tf.nn.relu, reuse=False):
    with tf.variable_scope("generator"):
        shape = z.get_shape().as_list()
        weight_init = tf.random_normal_initializer(stddev=0.1)
        bias_init = tf.constant_initializer(0.0)

        fc1_units = 256
        fc1_weights = tf.get_variable(
            'gen_fc1_weights', (shape[1], fc1_units),
            dtype=tf.float32,
            initializer=weight_init)
        fc1_biases = tf.get_variable(
            'gen_fc1_biases', (fc1_units), initializer=bias_init)
        fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)

        fc2_units = 784
        fc2_weights = tf.get_variable(
            'gen_fc2_weights', (fc1_units, fc2_units),
            dtype=tf.float32,
            initializer=weight_init)
        fc2_biases = tf.get_variable(
            'gen_fc2_biases', (fc2_units), initializer=bias_init)
        fc2 = activation(tf.matmul(fc1, fc2_weights) + fc2_biases)

        output = tf.nn.sigmoid(fc2, name='gen_sigmoid_output')
        return output


def discriminator(X, activation=tf.nn.relu):
    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
        shape = X.get_shape().as_list()
        weight_init = tf.random_normal_initializer(stddev=0.1)
        bias_init = tf.constant_initializer(0.0)

        fc1_units = 1024
        fc1_weights = tf.get_variable(
            'dis_fc1_weights', (shape[1], fc1_units),
            dtype=tf.float32,
            initializer=weight_init)
        fc1_biases = tf.get_variable(
            'dis_fc1_biases', (fc1_units), initializer=bias_init)
        fc1 = activation(tf.matmul(X, fc1_weights) + fc1_biases)

        fc2_units = 1
        fc2_weights = tf.get_variable(
            'dis_fc2_weights', (fc1_units, fc2_units),
            dtype=tf.float32,
            initializer=weight_init)
        fc2_biases = tf.get_variable(
            'dis_fc2_biases', (fc2_units), initializer=bias_init)
        fc2 = tf.matmul(fc1, fc2_weights) + fc2_biases

        return fc2


### ADDED TO TEST
real_batch_size, fake_batch_size = 10, 10
learning_rate = 1e-5
beta1 = 0.5
###
X = tf.placeholder(tf.float32, shape=(real_batch_size, 28 * 28), name='X')
z = tf.placeholder(dtype=tf.float32, shape=(fake_batch_size, 100), name='z')

gen = generator(z)
dis_real_logits = discriminator(X)
dis_fake_logits = discriminator(gen)

dis_real_loss = tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.ones_like(dis_real_logits), logits=dis_real_logits))
dis_fake_loss = tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.zeros_like(dis_fake_logits), logits=dis_fake_logits))

dis_loss = dis_real_loss + dis_fake_loss

gen_loss = tf.reduce_mean(
    tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.ones_like(dis_fake_logits), logits=dis_fake_logits))

train_vars = tf.trainable_variables()
dis_vars = [var for var in train_vars if 'dis_' in var.name]
gen_vars = [var for var in train_vars if 'gen_' in var.name]

dis_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(
    dis_loss, var_list=dis_vars)
gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(
    gen_loss, var_list=gen_vars)