python python-3.x tensorflow conv-neural-network mnist

AttributeError: 'dict' object has no attribute 'train' error when trying to implement a convolution neural network program using tensorflow in python

Even though I am super new to the topic, I am trying to implement a CNN program that can be used to recognize images without using Keras. I am currently using python with Jupyter/Google Colab.

After fixing a few other errors that came up in my code, I now have this error:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py:1761: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).
  warnings.warn('An interactive session is already active. This can '

Training the model....
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-28-1af250ed46b3> in <module>()
    108     for i in range(num_iterations):
    109         # Get the next batch of images
--> 110         batch = mnist.train.next_batch(batch_size)              #third got an error for this line -
    111         # x_batch, y_batch = mnist.train.next_batch(batch_size)
    112 

AttributeError: 'dict' object has no attribute 'train'

This is my current code:

!pip install tensorflow_datasets
!pip install --upgrade tensorflow
!pip install tensorflow-datasets
!pip install mnist
#!pip install tensorflow.examples.tutorials.mnist

import argparse
print ('argparse version: ', argparse.__version__)
import mnist
print ('MNIST version: ', mnist.__version__)
import tensorflow_datasets
print ('tensorflow_datasets version: ', tensorflow_datasets.__version__)
import tensorflow.compat.v1 as tf
print ('tf version: ', tf.__version__)
tf.disable_v2_behavior()
#from tensorflow.examples.tutorials.mnist import input_data


#def build_arg_parser():
#    parser = argparse.ArgumentParser(description='Build a CNN classifier \
#            using MNIST data')
#    parser.add_argument('--input-dir', dest='input_dir', type=str,
#            default='./mnist_data', help='Directory for storing data')
#    return parser

def get_weights(shape):
    data = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(data)

def get_biases(shape):
    data = tf.constant(0.1, shape=shape)
    return tf.Variable(data)

def create_layer(shape):
    # Get the weights and biases
    W = get_weights(shape)
    b = get_biases([shape[-1]])

    return W, b

def convolution_2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1],
            padding='SAME')

def max_pooling(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
            strides=[1, 2, 2, 1], padding='SAME')

if __name__ == '__main__':
    #args = build_arg_parser().parse_args()

    # Get the MNIST data
    mnist = tensorflow_datasets.load('mnist')

    # The images are 28x28, so create the input layer
    # with 784 neurons (28x28=784)
    x = tf.placeholder(tf.float32, [None, 784])

    # Reshape 'x' into a 4D tensor
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # Define the first convolutional layer
    W_conv1, b_conv1 = create_layer([5, 5, 1, 32])

    # Convolve the image with weight tensor, add the
    # bias, and then apply the ReLU function
    h_conv1 = tf.nn.relu(convolution_2d(x_image, W_conv1) + b_conv1)

    # Apply the max pooling operator
    h_pool1 = max_pooling(h_conv1)

    # Define the second convolutional layer
    W_conv2, b_conv2 = create_layer([5, 5, 32, 64])

    # Convolve the output of previous layer with the
    # weight tensor, add the bias, and then apply
    # the ReLU function
    h_conv2 = tf.nn.relu(convolution_2d(h_pool1, W_conv2) + b_conv2)

    # Apply the max pooling operator
    h_pool2 = max_pooling(h_conv2)

    # Define the fully connected layer
    W_fc1, b_fc1 = create_layer([7 * 7 * 64, 1024])

    # Reshape the output of the previous layer
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

    # Multiply the output of previous layer by the
    # weight tensor, add the bias, and then apply
    # the ReLU function
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Define the dropout layer using a probability placeholder
    # for all the neurons
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Define the readout layer (output layer)
    W_fc2, b_fc2 = create_layer([1024, 10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # Define the entropy loss and the optimizer
    y_loss = tf.placeholder(tf.float32, [None, 10])
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_loss, logits=y_conv))
    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)

    # Define the accuracy computation
    predicted = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_loss, 1))
    accuracy = tf.reduce_mean(tf.cast(predicted, tf.float32))

    # Create and run a session
    sess = tf.InteractiveSession()
    init = tf.initialize_all_variables()
    sess.run(init)

    # Start training
    num_iterations = 21000
    batch_size = 75
    print('\nTraining the model....')
    for i in range(num_iterations):
        # Get the next batch of images
        batch = mnist.train.next_batch(batch_size)

        # Print progress
        if i % 50 == 0:
            cur_accuracy = accuracy.eval(feed_dict = {
                    x: batch[0], y_loss: batch[1], keep_prob: 1.0})
            print('Iteration', i, ', Accuracy =', cur_accuracy)

        Train on the current batch
        optimizer.run(feed_dict = {x: batch[0], y_loss: batch[1], keep_prob: 0.5})

    # Compute accuracy using test data
    print('Test accuracy =', accuracy.eval(feed_dict = {
            x: mnist.test.images, y_loss: mnist.test.labels,
            keep_prob: 1.0}))

I have found a few posts that have the exact same code as I do, but all of their implementations somehow run. I tried looking up solutions, but did not end up finding one that worked for me.

One post I found said that "base dictionaries don't have a 'train' attribute." This made me curious as to why I then got this error if dictionaries normally don't have these attributes, but others had this same code work.

Another post used the line:

x_batch, y_batch = mnist.train.next_batch(batch_size)

instead of the line:

batch = mnist.train.next_batch(batch_size)

but neither seemed to work for me. None of the other changes/solutions I tried looking into ended up working either.

Does anyone have any idea on how to fix this no attribute error?

Solution

Seems that your code is old/out-dated, that's why it doesn't work any more. It is common to TensorFlow library to change very often, and they break interface also often, hence old code stops working.

First, you try to import mnist, this is some wrong module, it contains almost no code, and seems unuseful, probably it was useful and working before, but not now.

Also function mnist.train.next_batch(...) doesn't work anymore as it is not implemented inside dataset mnist any more, probably it was also working before.

I decided to implement my own helper class MyDS that implements all these missing functionalities. Down below is your full corrected code including my class (at the beginning):

if __name__ == '__main__':
    import tensorflow.compat.v1 as tf
    tf.enable_eager_execution()
    import tensorflow_datasets as tfds

    class MyDS(object):
        class SubDS(object):
            import numpy as np
            def __init__(self, ds, *, one_hot):
                np = self.__class__.np
                self.ds = [e for e in ds.as_numpy_iterator()]
                self.sds = {(k + 's') : np.stack([
                    (e[k] if len(e[k].shape) > 0 else e[k][None]).reshape(-1) for e in self.ds
                ], 0) for k in self.ds[0].keys()}
                self.one_hot = one_hot
                if one_hot is not None:
                    self.max_one_hot = np.max(self.sds[one_hot + 's'])
            def _to_one_hot(self, a, maxv):
                np = self.__class__.np
                na = np.zeros((a.shape[0], maxv + 1), dtype = a.dtype)
                for i, e in enumerate(a[:, 0]):
                    na[i, e] = True
                return na
            def _apply_one_hot(self, key, maxv):
                assert maxv >= self.max_one_hot, (maxv, self.max_one_hot)
                self.max_one_hot = maxv
                self.sds[key + 's'] = self._to_one_hot(self.sds[key + 's'], self.max_one_hot)
            def next_batch(self, num = 16):
                np = self.__class__.np
                idx = np.random.choice(len(self.ds), num)
                res = {k : np.stack([
                    (self.ds[i][k] if len(self.ds[i][k].shape) > 0 else self.ds[i][k][None]).reshape(-1) for i in idx
                ], 0) for k in self.ds[0].keys()}
                if self.one_hot is not None:
                    res[self.one_hot] = self._to_one_hot(res[self.one_hot], self.max_one_hot)
                for i, (k, v) in enumerate(list(res.items())):
                    res[i] = v
                return res
            def __getattr__(self, name):
                if name not in self.__dict__['sds']:
                    return self.__dict__[name]
                return self.__dict__['sds'][name]
        def __init__(self, name, *, one_hot = None):
            self.ds = tfds.load(name)
            self.sds = {}
            for k, v in self.ds.items():
                self.sds[k] = self.__class__.SubDS(self.ds[k], one_hot = one_hot)
            if one_hot is not None:
                maxh = max(e.max_one_hot for e in self.sds.values())
                for e in self.sds.values():
                    e._apply_one_hot(one_hot, maxh)
        def __getattr__(self, name):
            if name not in self.__dict__['sds']:
                return self.__dict__[name]
            return self.__dict__['sds'][name]
            
    # Get the MNIST data
    mnist = MyDS('mnist', one_hot = 'label') # tensorflow_datasets.load('mnist')

    import argparse
    print ('argparse version: ', argparse.__version__)
    #import mnist
    #print ('MNIST version: ', mnist.__version__)
    #import tensorflow_datasets
    print ('tensorflow_datasets version: ', tfds.__version__)
    #import tensorflow.compat.v1 as tf
    print ('tf version: ', tf.__version__)
    tf.disable_eager_execution()
    tf.disable_v2_behavior()
    #from tensorflow.examples.tutorials.mnist import input_data


    #def build_arg_parser():
    #    parser = argparse.ArgumentParser(description='Build a CNN classifier \
    #            using MNIST data')
    #    parser.add_argument('--input-dir', dest='input_dir', type=str,
    #            default='./mnist_data', help='Directory for storing data')
    #    return parser

    def get_weights(shape):
        data = tf.truncated_normal(shape, stddev=0.1)
        return tf.Variable(data)

    def get_biases(shape):
        data = tf.constant(0.1, shape=shape)
        return tf.Variable(data)

    def create_layer(shape):
        # Get the weights and biases
        W = get_weights(shape)
        b = get_biases([shape[-1]])

        return W, b

    def convolution_2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1],
                padding='SAME')

    def max_pooling(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                strides=[1, 2, 2, 1], padding='SAME')

    #args = build_arg_parser().parse_args()

    # The images are 28x28, so create the input layer
    # with 784 neurons (28x28=784)
    x = tf.placeholder(tf.float32, [None, 784])

    # Reshape 'x' into a 4D tensor
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # Define the first convolutional layer
    W_conv1, b_conv1 = create_layer([5, 5, 1, 32])

    # Convolve the image with weight tensor, add the
    # bias, and then apply the ReLU function
    h_conv1 = tf.nn.relu(convolution_2d(x_image, W_conv1) + b_conv1)

    # Apply the max pooling operator
    h_pool1 = max_pooling(h_conv1)

    # Define the second convolutional layer
    W_conv2, b_conv2 = create_layer([5, 5, 32, 64])

    # Convolve the output of previous layer with the
    # weight tensor, add the bias, and then apply
    # the ReLU function
    h_conv2 = tf.nn.relu(convolution_2d(h_pool1, W_conv2) + b_conv2)

    # Apply the max pooling operator
    h_pool2 = max_pooling(h_conv2)

    # Define the fully connected layer
    W_fc1, b_fc1 = create_layer([7 * 7 * 64, 1024])

    # Reshape the output of the previous layer
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

    # Multiply the output of previous layer by the
    # weight tensor, add the bias, and then apply
    # the ReLU function
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Define the dropout layer using a probability placeholder
    # for all the neurons
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Define the readout layer (output layer)
    W_fc2, b_fc2 = create_layer([1024, 10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # Define the entropy loss and the optimizer
    y_loss = tf.placeholder(tf.float32, [None, 10])
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_loss, logits=y_conv))
    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)

    # Define the accuracy computation
    predicted = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_loss, 1))
    accuracy = tf.reduce_mean(tf.cast(predicted, tf.float32))

    # Create and run a session
    sess = tf.InteractiveSession()
    init = tf.initialize_all_variables()
    sess.run(init)

    # Start training
    num_iterations = 21000
    batch_size = 75
    print('\nTraining the model....')
    for i in range(num_iterations):
        # Get the next batch of images
        batch = mnist.train.next_batch(batch_size)

        # Print progress
        if i % 50 == 0:
            cur_accuracy = accuracy.eval(feed_dict = {
                    x: batch[0], y_loss: batch[1], keep_prob: 1.0})
            print('Iteration', i, ', Accuracy =', cur_accuracy)

        # Train on the current batch
        optimizer.run(feed_dict = {x: batch[0], y_loss: batch[1], keep_prob: 0.5})

    # Compute accuracy using test data
    print('Test accuracy =', accuracy.eval(feed_dict = {
            x: mnist.test.images, y_loss: mnist.test.labels,
            keep_prob: 1.0}))