Search code examples
c++matrixlinear-algebracovariance

C++ PCA - Calculating covariance MATRIX


I'm trying to calculate the Covariance (matrix) of a vector in C++ ...

I have carried out the following:

std::vector<std::vector<double> > data = { {2.5, 2.4}, {0.5, 0.7} };

I have then calculated and subtracted the mean, which gave the following result:

data = { {0.05, -0.05}, {-0.1, 0.1} }

As far as I'm aware, the next step is to transpose the matrix, and multiply the origin together, take the sum and finally divide by the dimensions X - 1..

I have written the following:

void cover(std::vector<std::vector<double> > &d)
{
    double cov = 0.0; 

    for(unsigned i=0; (i < d.size()); i++)
    {
        for(unsigned j=0; (j < d[i].size()); j++)
        {
            cov += d[i][j] * d[j][i] / (d[i].size() - 1);
            std::cout << cov << " ";
        }
        std::cout << std::endl;
    }
}

Where d is the vector after the mean has been subtracted from each of the points Which gives me the result:

0.0025, 0.0075 
0.0125, 0.0225

Where compared with matlab:

2.0000    1.7000
1.7000    1.4450

Does anyone have any ideas to where I am going wrong?

Thanks


Solution

  • This statement:

    As far as I'm aware, the next step is to transpose the matrix, and multiply the origin together, take the sum and finally divide by the dimensions X - 1..

    And this implementation:

    cov += d[i][j] * d[j][i] / (d[i].size() - 1);
    

    Don't say the same thing. Based on the definition here:

    void outer_product(vector<double> row, vector<double> col, vector<vector<double>>& dst) {
        for(unsigned i = 0; i < row.size(); i++) {
            for(unsigned j = 0; j < col.size(); i++) {
                dst[i][j] = row[i] * col[j];
            }
        }
    }
    
    //computes row[i] - val for all i;
    void subtract(vector<double> row, double val, vector<double>& dst) {
        for(unsigned i = 0; i < row.size(); i++) {
            dst[i] = row[i] - val;
        }
    }
    
    //computes m[i][j] + m2[i][j]
    void add(vector<vector<double>> m, vector<vector<double>> m2, vector<vector<double>>& dst) {
        for(unsigned i = 0; i < m.size(); i++) {
            for(unsigned j = 0; j < m[i].size(); j++) { 
                dst[i][j] = m[i][j] + m2[i][j];
            }
        }
    }
    
    double mean(std::vector<double> &data) {
        double mean = 0.0;
    
        for(unsigned i=0; (i < data.size());i++) {
            mean += data[i];
        }
    
        mean /= data.size();
        return mean;
    }
    
    void scale(vector<vector<double>> & d, double alpha) {
        for(unsigned i = 0; i < d.size(); i++) {
            for(unsigned j = 0; j < d[i].size(); j++) {
                d[i][j] *= alpha;
            }
        }
    }
    

    So, given these definitions, we can compute the value for the covariance matrix.

    void compute_covariance_matrix(vector<vector<double>> & d, vector<vector<double>> & dst) {
        for(unsigned i = 0; i < d.size(); i++) {
            double y_bar = mean(d[i]);
            vector<double> d_d_bar(d[i].size());
            subtract(d[i], y_bar, d_d_bar);
            vector<vector<double>> t(d.size());
            outer_product(d_d_bar, d_d_bar, t);
            add(dst, t, dst);
        } 
        scale(dst, 1/(d.size() - 1));
    }