Search code examples
c#statisticscovariance

Running (one pass) calculation of covariance


I got a set of 3d vectors (x,y,z), and I want to calculate the covariance matrix without storing the vectors.

I will do it in C#, but eventually I will implement it in C on a microcontroller, so I need the algorithm in itself, and not a library.

Pseudocode would be great also.


Solution

  • I think I have found the solution. It is based on this article about how to calculate covariance manually and this one about calculating running variance. And then I adapted the algorithm in the latter to calculate covariance instead of variance, given my understanding of it from the first article.

    public class CovarianceMatrix
    {
        private int _n;
        private Vector _oldMean, _newMean,
                        _oldVarianceSum, _newVarianceSum,
                        _oldCovarianceSum, _newCovarianceSum;
    
        public void Push(Vector x)
        {
            _n++;
            if (_n == 1)
            {
                _oldMean = _newMean = x;
                _oldVarianceSum = new Vector(0, 0, 0);
                _oldCovarianceSum = new Vector(0, 0, 0);
            }
            else
            {
                //_newM = _oldM + (x - _oldM) / _n;
                _newMean = new Vector(
                    _oldMean.X + (x.X - _oldMean.X) / _n,
                    _oldMean.Y + (x.Y - _oldMean.Y) / _n,
                    _oldMean.Z + (x.Z - _oldMean.Z) / _n);
    
                //_newS = _oldS + (x - _oldM) * (x - _newM);
                _newVarianceSum = new Vector(
                    _oldVarianceSum.X + (x.X - _oldMean.X) * (x.X - _newMean.X),
                    _oldVarianceSum.Y + (x.Y - _oldMean.Y) * (x.Y - _newMean.Y),
                    _oldVarianceSum.Z + (x.Z - _oldMean.Z) * (x.Z - _newMean.Z));
    
                /* .X is X vs Y
                 * .Y is Y vs Z
                 * .Z is Z vs X
                 */
                _newCovarianceSum = new Vector(
                    _oldCovarianceSum.X + (x.X - _oldMean.X) * (x.Y - _newMean.Y),
                    _oldCovarianceSum.Y + (x.Y - _oldMean.Y) * (x.Z - _newMean.Z),
                    _oldCovarianceSum.Z + (x.Z - _oldMean.Z) * (x.X - _newMean.X));
    
                // set up for next iteration
                _oldMean = _newMean;
                _oldVarianceSum = _newVarianceSum;
            }
        }
        public int NumDataValues()
        {
            return _n;
        }
    
        public Vector Mean()
        {
            return (_n > 0) ? _newMean : new Vector(0, 0, 0);
        }
    
        public Vector Variance()
        {
            return _n <= 1 ? new Vector(0, 0, 0) : _newVarianceSum.DivideBy(_n - 1);
        }
    }