Search code examples
pythontensorflowmnist

Why doesn't the TF MNIST example work with original data?


I've adjusted the data loading of the TensorFlow MNIST example to use the original MNIST data. The original example gets > 0.80 accuracy after 100 epochs. My adjusted example (set use_original = False to use it) gets only about 0.09 - 0.10 accuracy (which is just random). Could you please explain why?

#!/usr/bin/env python

"""MNIST with Tensorflow."""

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

from struct import unpack
import gzip
from numpy import zeros, uint8

from sklearn.preprocessing import OneHotEncoder

use_original = True


def get_labeled_data(imagefile, labelfile):
    """
    Read input-vector (image) and target class (label, 0-9).

    Return
    ------
    tuple of lists
    """
    # Open the images with gzip in read binary mode
    images = gzip.open(imagefile, 'rb')
    labels = gzip.open(labelfile, 'rb')

    # Read the binary data

    # We have to get big endian unsigned int. So we need '>I'

    # Get metadata for images
    images.read(4)  # skip the magic_number
    number_of_images = images.read(4)
    number_of_images = unpack('>I', number_of_images)[0]
    rows = images.read(4)
    rows = unpack('>I', rows)[0]
    cols = images.read(4)
    cols = unpack('>I', cols)[0]

    # Get metadata for labels
    labels.read(4)  # skip the magic_number
    N = labels.read(4)
    N = unpack('>I', N)[0]

    if number_of_images != N:
        raise Exception('number of labels did not match the number of images')

    # Get the data
    x = zeros((N, rows * cols), dtype=uint8)  # Initialize numpy array
    y = zeros((N, 1), dtype=uint8)  # Initialize numpy array
    for i in range(N):
        if i % 1000 == 0:
            print("%s: %i" % (imagefile, i))
        j = 0
        for row in range(rows):
            for col in range(cols):
                tmp_pixel = images.read(1)  # Just a single byte
                tmp_pixel = unpack('>B', tmp_pixel)[0]
                x[i][j] = tmp_pixel
                j += 1
        tmp_label = labels.read(1)
        y[i] = unpack('>B', tmp_label)[0]
    enc = OneHotEncoder()
    enc.fit(y)
    y = enc.transform(y).toarray()
    return (x, y)


epochs = 20000


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


def eval_network(dataset, correct_prediction):
    correct_sum = 0
    total_test = 0
    for i in range(dataset.labels.shape[0] / 1000):
        feed_dict = {x: dataset.images[i * 1000:(i + 1) * 1000],
                     y_: dataset.labels[i * 1000:(i + 1) * 1000],
                     keep_prob: 1.0}
        test_correct = correct_prediction.eval(feed_dict=feed_dict)
        correct_sum += sum(test_correct)
        total_test += len(test_correct)
    return float(correct_sum) / total_test


def add_score(filename, mnist, scoring, epoch, other=''):
    with open(filename, "a") as myfile:
        train = eval_network(mnist.train, scoring)
        test = eval_network(mnist.test, scoring)
        myfile.write("%i;%0.6f;%0.6f;%s\n" % (epoch, train, test, other))


mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

sess.run(tf.initialize_all_variables())

y = tf.nn.softmax(tf.matmul(x, W) + b)

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv),
                                              reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

sess.run(tf.initialize_all_variables())

# Data loading

if use_original:
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
else:
    mnist = lambda: None
    setattr(mnist, 'train', lambda: None)
    setattr(mnist, 'test', lambda: None)
    setattr(mnist.train, 'images', lambda: None)
    setattr(mnist.train, 'labels', lambda: None)
    setattr(mnist.test, 'images', lambda: None)
    setattr(mnist.test, 'labels', lambda: None)
    xs, ys = get_labeled_data('mnist/train-images-idx3-ubyte.gz',
                              'mnist/train-labels-idx1-ubyte.gz')
    mnist.train.images = xs
    mnist.train.labels = ys
    xst, yst = get_labeled_data('mnist/t10k-images-idx3-ubyte.gz',
                                'mnist/t10k-labels-idx1-ubyte.gz')
    mnist.test.images = xst
    mnist.test.labels = yst
for i in range(epochs):
    if use_original:
        batch = mnist.train.next_batch(50)  # This works
    else:
        # This doesnt work
        batch = (xs[i * 50:(i + 1) * 50], ys[i * 50:(i + 1) * 50])
    if i % 100 == 0:
        add_score('accuracy.csv',
                  mnist,
                  correct_prediction,
                  i)
    train_step.run(feed_dict={x: batch[0],
                              y_: batch[1],
                              keep_prob: 0.5})
add_score('accuracy.csv', mnist, correct_prediction, epochs)

Solution

  • I found the problem. The data needs to be loaded as numpy.float32 (and normalized by dividing it through 255).