Search code examples
pythontensorflowdropout

How to implement dropout with tensorflow


I have applied dropout in the tensorflow 3-knn implementation. But I have got an error due to the placeholder's variable keep_prob.

TypeError: Cannot interpret feed_dict key as Tensor: Can not convert an int into a Tensor.

I have written 2 functions: forward_propagation (that implements the forward propagation) and model (that train the parameters on your model). Here is the short implementation of the 2 functions :

How could i affect keep_prob value from "model" function to the "forward_propagation" function in order to train the model?


def forward_propagation(X, parameters, keep_prob):

    # Retrieve the parameters from the dictionary "parameters" 
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    #keep_prob = tf.placeholder(dtype=tf.float64)
    ### with keep_drop
    Z1 = tf.add(tf.matmul(W1, X), b1)                 # Z1 = np.dot(W1, X) + b1
    A1 = tf.nn.relu(Z1)                               # A1 = relu(Z1)
    A1 = tf.nn.dropout(A1, keep_prob)
    Z2 = tf.add(tf.matmul(W2, A1), b2)                # Z2 = np.dot(W2, A1) + b2
    A2 = tf.nn.relu(Z2)                               # A2 = relu(Z2)
    A2 = tf.nn.dropout(A2, keep_prob)
    Z3 = tf.add(tf.matmul(W3, A2), b3)                # Z3 = np.dot(W3, A2) + b3
    ### with keep_drop

    return Z3

def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001, num_epochs = 1500, minibatch_size = 32, keep_prob = 1, seed = 0):

    ops.reset_default_graph()                         
    tf.set_random_seed(seed)                          
    seed = seed                                       
    (n_x, m) = X_train.shape                          
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                      # To keep track of the cost


    # Create Placeholders of shape (n_x, n_y)
    ### START CODE HERE ### (1 line)
    X, Y = create_placeholders(n_x, n_y)
    keep_prob_ = tf.constant(keep_prob, dtype=tf.float32, name="keep_prob_")
    ### END CODE HERE ###

    # Initialize parameters
    parameters = initialize_parameters()


    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters, keep_prob)

    # Cost = loss function: Add cost function to tensorflow graph
    cost = compute_cost( Z3=Z3, Y=Y)

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)

    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:

        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = fct_utils.random_mini_batches(X_train, Y_train, 
                                                        minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                _ , minibatch_cost = sess.run([optimizer, cost], 
                                              feed_dict={Y:minibatch_Y, 
                                                         X:minibatch_, 
                                                         keep_prob:keep_prob_})

                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")

Solution

  • The position of keep_prob and keep_prob_ should be exchanged. In your code, keep_prob is a int type, and keep_prob_ is a tensor and should be the key in feed_dict.

    feed_dict={Y:minibatch_Y, X:minibatch_, keep_prob_:keep_prob}