Search code examples
tensorflowclassificationpcasvdmnist

SVM on MNIST data with PCA using tensorflow


I intended to learn about PCA using SVD and therefore implemented it and tried to use it on MNIST data.

import numpy as np

class PCA(object):

    def __init__ (self, X):

        self.N, self.dim, *rest = X.shape
        self.X = X

        '''
        U S V' = svd(X) 
        '''
        X_std = (X - np.mean(X, axis=0))/(np.std(X, axis=0)+1e-13)

        [self.U, self.s, self.Vt] = np.linalg.svd(X_std)
        self.V = self.Vt.T
        self.variance_ratio = self.s


    def variance_explained_ratio (self):

        '''
        Returns the cumulative variance captured with each added principal component
        '''
        return np.cumsum(self.variance_ratio)/np.sum(self.variance_ratio)

    def X_projected (self, r):

        '''
        Returns the data X projected along the first r principal components
        '''

        if r is None:
            r = self.dim
        X_proj = np.zeros((r, self.N))
        P_reduce = self.V[:,0:r]
        X_proj = self.X.dot(P_reduce)
        return X_proj

Now with this implementation for PCA, I tried to apply it to MNIST data to see the performance with and without PCA for classification using softmax. The code for that is as follows:

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# Using first 10000 images 
train_data = mnist.train.images[:10000,:]
train_labels = mnist.train.labels[:10000,:]
pca1 = PCA(train_data)
pca_test = PCA(mnist.test.images)

n_components = 14
X_proj1 = pca1.X_projected(r=n_components)
X_projTest = pca_test.X_projected(r=n_components)

t1 = time.time()

x = tf.placeholder(tf.float32, [None, n_components])
W = tf.Variable(tf.zeros([n_components, 10]))
b = tf.Variable(tf.zeros([10]))


y = tf.cast(tf.nn.softmax(tf.matmul(x, W) + b), tf.float32)
y_ = tf.placeholder(tf.float32, [None, 10])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_*tf.log(y), 
reduction_indices=[1]))

train_step = 
tf.train.GradientDescentOptimizer(0.7).minimize(cross_entropy)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

m = 10000

for _ in range(1000):
    indices = random.sample(range(0, m), 100)
    batch_xs = X_proj1[indices]
    batch_ys = train_labels[indices]
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


accuracy = sess.run(accuracy, feed_dict={x: X_projTest, y_: 
mnist.test.labels})
print("Accuracy: %f" % accuracy)
sess.close()
t2 = time.time()
print ("Total time taken: %f seconds" % (t2-t1))

The accuracy I obtain using this is only around 19% whereas with the train_data and train_labels, the accuracy is more than 90%. Could someone suggest where I'm going wrong?


Solution

  • When we use PCA or feature scaling, we set the underlying parameters on the training dataset and then just apply/transform it on the test dataset. The test dataset is not used to calculate the key parameters, or in this case, SVD should only be applied on the train dataset. e.g. in sklearn's PCA, we use the following code :

    from sklearn.decomposition import PCA
    pca = PCA(n_components = 'whatever number you want')
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    

    Note, that we fit on the training dataset, X_train and transform on X_test.

    Similarly, for the above implementation, there's no need to create the pca_test object. Tweak the X_projTest variable to :

    X_projTest = mnist.test.images.dot(pca1.V[:,0:n_components])
    

    This should solve for the low test accuracy.