Search code examples
tensorflowdeep-learninggenerative-adversarial-network

Tensorflow GAN: "No gradients provided for any variable"


I'm trying to set up a GAN with TF but I'm to stupid. I searched the web but couldn't find an answer.

When I run the code provided I get:

gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(gen_loss, var_list=gen_vars)

ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables

def generator(z, activation=tf.nn.relu, reuse=False):
    shape = z.get_shape().as_list()
    weight_init = tf.random_normal_initializer(stddev=0.1)
    bias_init = tf.constant_initializer(0.0)

    fc1_units = 256
    fc1_weights = tf.get_variable('gen_fc1_weights', (shape[1], fc1_units), dtype=tf.float32, initializer=weight_init)
    fc1_biases = tf.get_variable('gen_fc1_biases', (fc1_units), initializer=bias_init)
    fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)

    fc2_units = 784
    fc2_weights = tf.get_variable('gen_fc2_weights', (fc1_units, fc2_units), dtype=tf.float32, initializer=weight_init)
    fc2_biases = tf.get_variable('gen_fc2_biases', (fc2_units), initializer=bias_init)
    fc2 = activation(tf.matmul(fc1, fc2_weights) + fc2_biases)

    output = tf.nn.sigmoid(fc2, name='gen_sigmoid_output')
    return output

def discriminator(X, activation=tf.nn.relu):
    shape = z.get_shape().as_list()
    weight_init = tf.random_normal_initializer(stddev=0.1)
    bias_init = tf.constant_initializer(0.0)

    with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):

        fc1_units = 1024
        fc1_weights = tf.get_variable('dis_fc1_weights', (shape[1], fc1_units), dtype=tf.float32, initializer=weight_init)
        fc1_biases = tf.get_variable('dis_fc1_biases', (fc1_units), initializer=bias_init)
        fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)

        fc2_units = 1
        fc2_weights = tf.get_variable('dis_fc2_weights', (fc1_units, fc2_units), dtype=tf.float32, initializer=weight_init)
        fc2_biases = tf.get_variable('dis_fc2_biases', (fc2_units), initializer=bias_init)
        fc2 = tf.matmul(fc1, fc2_weights) + fc2_biases

        sigmoid_out = tf.nn.sigmoid(fc2, name='dis_sigmoid_output')
    return sigmoid_out, fc2

X = tf.placeholder(tf.float32, shape=(real_batch_size, 28*28), name='X')
z = tf.placeholder(dtype=tf.float32, shape=(fake_batch_size, 100), name='z')

gen = generator(z)
dis_real, dis_real_logits = discriminator(X)
dis_fake, dis_fake_logits = discriminator(gen)


dis_real_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dis_real), logits=dis_real_logits))
dis_fake_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(dis_fake), logits=dis_fake_logits))
dis_loss = dis_real_loss + dis_fake_loss
gen_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(dis_fake), logits=dis_fake_logits))

train_vars = tf.trainable_variables()
dis_vars = [var for var in train_vars if 'dis_' in var.name]
gen_vars = [var for var in train_vars if 'gen_' in var.name]

dis_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(dis_loss, var_list=dis_vars)
gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(gen_loss, var_list=gen_vars)

Solution

  • Your problem is in how you filter the variables:

    dis_vars = [var for var in train_vars if 'dis_' in var.name]
    gen_vars = [var for var in train_vars if 'gen_' in var.name]
    

    You defined the variables for the discriminator in the discriminator scope and of the generator in the / (no prefix) scope, hence your filter just filter out every variable.

    You can gather the variables filtering the correctly:

    dis_vars = [var for var in train_vars if 'discriminator' in var.name]
    gen_vars = [var for var in train_vars if 'discriminator' not in var.name]
    

    Moreover, the error states "No gradients provided for any variable, check your graph for ops that do not support gradients, between variables [...] and loss Tensor("Mean_2:0", shape=(), dtype=float32)"

    In fact, the problem is in the loss tensor.

    The loss tensor is the evaluation of the discriminator when the input is the generator output. The discriminator definition is wrong, in fact, you're referring to a variable z never defined. Hence, if you update your discriminator code using X instead of z it works:

    import tensorflow as tf
    
    
    def generator(z, activation=tf.nn.relu, reuse=False):
        with tf.variable_scope("generator"):
            shape = z.get_shape().as_list()
            weight_init = tf.random_normal_initializer(stddev=0.1)
            bias_init = tf.constant_initializer(0.0)
    
            fc1_units = 256
            fc1_weights = tf.get_variable(
                'gen_fc1_weights', (shape[1], fc1_units),
                dtype=tf.float32,
                initializer=weight_init)
            fc1_biases = tf.get_variable(
                'gen_fc1_biases', (fc1_units), initializer=bias_init)
            fc1 = activation(tf.matmul(z, fc1_weights) + fc1_biases)
    
            fc2_units = 784
            fc2_weights = tf.get_variable(
                'gen_fc2_weights', (fc1_units, fc2_units),
                dtype=tf.float32,
                initializer=weight_init)
            fc2_biases = tf.get_variable(
                'gen_fc2_biases', (fc2_units), initializer=bias_init)
            fc2 = activation(tf.matmul(fc1, fc2_weights) + fc2_biases)
    
            output = tf.nn.sigmoid(fc2, name='gen_sigmoid_output')
            return output
    
    
    def discriminator(X, activation=tf.nn.relu):
        with tf.variable_scope('discriminator', reuse=tf.AUTO_REUSE):
            shape = X.get_shape().as_list()
            weight_init = tf.random_normal_initializer(stddev=0.1)
            bias_init = tf.constant_initializer(0.0)
    
            fc1_units = 1024
            fc1_weights = tf.get_variable(
                'dis_fc1_weights', (shape[1], fc1_units),
                dtype=tf.float32,
                initializer=weight_init)
            fc1_biases = tf.get_variable(
                'dis_fc1_biases', (fc1_units), initializer=bias_init)
            fc1 = activation(tf.matmul(X, fc1_weights) + fc1_biases)
    
            fc2_units = 1
            fc2_weights = tf.get_variable(
                'dis_fc2_weights', (fc1_units, fc2_units),
                dtype=tf.float32,
                initializer=weight_init)
            fc2_biases = tf.get_variable(
                'dis_fc2_biases', (fc2_units), initializer=bias_init)
            fc2 = tf.matmul(fc1, fc2_weights) + fc2_biases
    
            return fc2
    
    
    ### ADDED TO TEST
    real_batch_size, fake_batch_size = 10, 10
    learning_rate = 1e-5
    beta1 = 0.5
    ###
    X = tf.placeholder(tf.float32, shape=(real_batch_size, 28 * 28), name='X')
    z = tf.placeholder(dtype=tf.float32, shape=(fake_batch_size, 100), name='z')
    
    gen = generator(z)
    dis_real_logits = discriminator(X)
    dis_fake_logits = discriminator(gen)
    
    dis_real_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(dis_real_logits), logits=dis_real_logits))
    dis_fake_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(dis_fake_logits), logits=dis_fake_logits))
    
    dis_loss = dis_real_loss + dis_fake_loss
    
    gen_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.ones_like(dis_fake_logits), logits=dis_fake_logits))
    
    train_vars = tf.trainable_variables()
    dis_vars = [var for var in train_vars if 'dis_' in var.name]
    gen_vars = [var for var in train_vars if 'gen_' in var.name]
    
    dis_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(
        dis_loss, var_list=dis_vars)
    gen_optimize = tf.train.AdamOptimizer(learning_rate, beta1).minimize(
        gen_loss, var_list=gen_vars)