Search code examples
pythonnumpymachine-learningpcacovariance

Manual Implementation of PCA produces a wrong plot, where eigenvectors are not orthogonal


I need to plot my eigenvectors that I calculated like this:

def fit(self, X):
    
    '''
    fits sorted eigenvalues and eigenvectors to class attributes. same goes for variance and explained variance.
    '''
    
    n_samples = X.shape[0]
    # We center the data and compute the sample covariance matrix.
    X -= np.mean(X, axis=0)
    self.cov_matrix_ = np.dot(X.T, X) / (n_samples-1)
    #test = np.cov(X)
    
    #Negative values are ignored with eigh
    (self.eigvalues_, self.components_) = np.linalg.eigh(self.cov_matrix_)
    
    idx = self.eigvalues_.argsort()[::-1]   
    self.eigvalues_ = self.eigvalues_[idx]
    self.components_ = self.components_[:,idx]
    self.variance_ = np.sum(self.eigvalues_)
    self.explained_variance_ = self.eigvalues_ / self.variance_
    
def transform(self, X):
    #project data onto eigenvectors
    print(self.components_.shape, X.shape)
    self.projected_ = X @ self.components_.T
    return self.projected_

Into the plot of the first 2 features of my dataset.

The shape of my self.components_ which are my 240 eigenvectors of my 100x240 dataset, have shape 240x240. After plotting the first two values of my 2 eigenvectors with the largest eigenvalue, it comes out like this:

pca = PCA()

pca.fit(subsample)

#pca.transform(subsample)

plt.scatter(subsample[:,0], subsample[:,1], edgecolor='none', alpha=0.5)
plt.quiver(pca.components_[0,0], pca.components_[0,1], 
       angles='xy', scale_units='xy', scale=1, width=0.002 )
plt.quiver(pca.components_[1,0], pca.components_[1,1], 
       angles='xy', scale_units='xy', scale=1, width=0.002 )

enter image description here

What am I doing wrong?


Solution

  • Your should sort your eigenvectors by the rows, not the columns, that is

    self.components_ = self.components_[:,idx]
    

    should be

    self.components_ = self.components_[idx]
    

    Also, you should ensure that you plot with equal aspect ratio, as the quivers may be misaligned:

    plt.gca().set_aspect('equal')
    

    It is good practice to include a minimum working example in your code, so remember that next time :). I had to infer what the rest of your code could be in order to get a minimum working example. Anyways, here is my proposed code:

    import numpy as np 
    from matplotlib import pyplot as plt
    
    class PCA:
        def fit(self, X):
            
            '''
            fits sorted eigenvalues and eigenvectors to class attributes. same goes for variance and explained variance.
            '''
            
            n_samples = X.shape[0]
            # We center the data and compute the sample covariance matrix.
            X -= np.mean(X, axis=0)
            self.cov_matrix_ = np.dot(X.T, X) / (n_samples-1)
            #test = np.cov(X)
            
            #Negative values are ignored with eigh
            (self.eigvalues_, self.components_) = np.linalg.eigh(self.cov_matrix_)
            
            idx = self.eigvalues_.argsort()[::-1]   
            self.eigvalues_ = self.eigvalues_[idx]
            self.components_ = self.components_[idx]
            self.variance_ = np.sum(self.eigvalues_)
            self.explained_variance_ = self.eigvalues_ / self.variance_
            
        def transform(self, X):
            #project data onto eigenvectors
            print(self.components_.shape, X.shape)
            self.projected_ = X @ self.components_.T
            return self.projected_
    
    pca = PCA()
    
    # Generate some dummy data
    subsample = np.random.randn(69,2)*0.1 
    subsample[:,0] = subsample[:,0]*8 
    subsample[:,1] = subsample[:,0]*2 + subsample[:,1] # Add some correlations
    
    pca.fit(subsample)
    
    plt.scatter(subsample[:,0], subsample[:,1], edgecolor='none', alpha=0.5)
    plt.quiver(pca.components_[0,0]*2, pca.components_[0,1]*2, # *2 to make arrows larger
           angles='xy', scale_units='xy', scale=1, width=0.006)
    plt.quiver(pca.components_[1,0]*2, pca.components_[1,1]*2, 
           angles='xy', scale_units='xy', scale=1, width=0.006)
    plt.gca().set_aspect('equal')
    plt.show()