运行(一次通过)协方差的计算
Running (one pass) calculation of covariance
我有一组 3d 向量 (x,y,z),我想在不存储向量的情况下计算协方差矩阵。
我会用 C# 实现,但最终我会在微控制器上用 C 实现,所以我需要算法本身,而不是库。
伪代码也很棒。
我想我已经找到了解决办法。它基于这篇关于 how to calculate covariance manually and this one about calculating running variance 的文章。然后根据我对第一篇文章的理解,我调整了后者的算法来计算协方差而不是方差。
public class CovarianceMatrix
{
private int _n;
private Vector _oldMean, _newMean,
_oldVarianceSum, _newVarianceSum,
_oldCovarianceSum, _newCovarianceSum;
public void Push(Vector x)
{
_n++;
if (_n == 1)
{
_oldMean = _newMean = x;
_oldVarianceSum = new Vector(0, 0, 0);
_oldCovarianceSum = new Vector(0, 0, 0);
}
else
{
//_newM = _oldM + (x - _oldM) / _n;
_newMean = new Vector(
_oldMean.X + (x.X - _oldMean.X) / _n,
_oldMean.Y + (x.Y - _oldMean.Y) / _n,
_oldMean.Z + (x.Z - _oldMean.Z) / _n);
//_newS = _oldS + (x - _oldM) * (x - _newM);
_newVarianceSum = new Vector(
_oldVarianceSum.X + (x.X - _oldMean.X) * (x.X - _newMean.X),
_oldVarianceSum.Y + (x.Y - _oldMean.Y) * (x.Y - _newMean.Y),
_oldVarianceSum.Z + (x.Z - _oldMean.Z) * (x.Z - _newMean.Z));
/* .X is X vs Y
* .Y is Y vs Z
* .Z is Z vs X
*/
_newCovarianceSum = new Vector(
_oldCovarianceSum.X + (x.X - _oldMean.X) * (x.Y - _newMean.Y),
_oldCovarianceSum.Y + (x.Y - _oldMean.Y) * (x.Z - _newMean.Z),
_oldCovarianceSum.Z + (x.Z - _oldMean.Z) * (x.X - _newMean.X));
// set up for next iteration
_oldMean = _newMean;
_oldVarianceSum = _newVarianceSum;
}
}
public int NumDataValues()
{
return _n;
}
public Vector Mean()
{
return (_n > 0) ? _newMean : new Vector(0, 0, 0);
}
public Vector Variance()
{
return _n <= 1 ? new Vector(0, 0, 0) : _newVarianceSum.DivideBy(_n - 1);
}
}
如果手头有 Matrix
和 Vector
类,公式很简单:
Vector mean;
Matrix covariance;
for (int i = 0; i < points.size(); ++i) {
Vector diff = points[i] - mean;
mean += diff / (i + 1);
covariance += diff * diff.transpose() * i / (i + 1);
}
covariance *= 1 / points.size()
我个人总是更喜欢这种风格而不是二次计算。代码简短,结果完美。
Matrix
和 Vector
可以有固定的维度,并且可以很容易地为此目的编码。您甚至可以将代码重写为离散浮点计算,避免计算协方差矩阵的对称部分。
注意倒数第二行代码有一个向量外积。并非所有矢量库都能正确解释它。
来自 emu 的代码很优雅,但需要额外的步骤才能正确:
Vector mean;
Matrix covariance;
for (int i = 0; i < points.size(); ++i) {
Vector diff = points[i] - mean;
mean += diff / (i + 1);
covariance += diff * diff.transpose() * i / (i + 1);
}
covariance = covariance/(points.size()-1);
注意归一化协方差的最后一步。
这里用一个简单的R语言例子来说明原理:
a <- matrix(rnorm(22), ncol = 2)
a1 <- a[1:10, ]
a2 <- a[2:11, ]
cov(a1)
cov(a2)
m <- 10
# initial step
m1.1 <- mean(a1[, 1])
m1.2 <- mean(a1[, 2])
c1.11 <- cov(a1)[1, 1]
c1.22 <- cov(a1)[2, 2]
c1.12 <- cov(a1)[1, 2]
#step 1->2
m2.1 <- m1.1 + (a[11, 1] - a[1, 1])/m
m2.2 <- m1.2 + (a[11, 2] - a[1, 2])/m
c2.11 <- c1.11 + (a[11, 1]^2 - a[1, 1]^2)/(m - 1) + (m1.1^2 - m2.1^2) * m/(m - 1)
c2.22 <- c1.22 + (a[11, 2]^2 - a[1, 2]^2)/(m - 1) + (m1.2^2 - m2.2^2) * m/(m - 1)
c2.12 <- c1.12 + (a[11, 1] * a[11, 2] - a[1, 1]*a[1, 2])/(m - 1) +
(m1.1 * m1.2 - m2.1 * m2.2) * m/(m - 1)
cov(a2) - matrix(c(c2.11, c2.12, c2.12, c2.22), ncol=2)
我有一组 3d 向量 (x,y,z),我想在不存储向量的情况下计算协方差矩阵。
我会用 C# 实现,但最终我会在微控制器上用 C 实现,所以我需要算法本身,而不是库。
伪代码也很棒。
我想我已经找到了解决办法。它基于这篇关于 how to calculate covariance manually and this one about calculating running variance 的文章。然后根据我对第一篇文章的理解,我调整了后者的算法来计算协方差而不是方差。
public class CovarianceMatrix
{
private int _n;
private Vector _oldMean, _newMean,
_oldVarianceSum, _newVarianceSum,
_oldCovarianceSum, _newCovarianceSum;
public void Push(Vector x)
{
_n++;
if (_n == 1)
{
_oldMean = _newMean = x;
_oldVarianceSum = new Vector(0, 0, 0);
_oldCovarianceSum = new Vector(0, 0, 0);
}
else
{
//_newM = _oldM + (x - _oldM) / _n;
_newMean = new Vector(
_oldMean.X + (x.X - _oldMean.X) / _n,
_oldMean.Y + (x.Y - _oldMean.Y) / _n,
_oldMean.Z + (x.Z - _oldMean.Z) / _n);
//_newS = _oldS + (x - _oldM) * (x - _newM);
_newVarianceSum = new Vector(
_oldVarianceSum.X + (x.X - _oldMean.X) * (x.X - _newMean.X),
_oldVarianceSum.Y + (x.Y - _oldMean.Y) * (x.Y - _newMean.Y),
_oldVarianceSum.Z + (x.Z - _oldMean.Z) * (x.Z - _newMean.Z));
/* .X is X vs Y
* .Y is Y vs Z
* .Z is Z vs X
*/
_newCovarianceSum = new Vector(
_oldCovarianceSum.X + (x.X - _oldMean.X) * (x.Y - _newMean.Y),
_oldCovarianceSum.Y + (x.Y - _oldMean.Y) * (x.Z - _newMean.Z),
_oldCovarianceSum.Z + (x.Z - _oldMean.Z) * (x.X - _newMean.X));
// set up for next iteration
_oldMean = _newMean;
_oldVarianceSum = _newVarianceSum;
}
}
public int NumDataValues()
{
return _n;
}
public Vector Mean()
{
return (_n > 0) ? _newMean : new Vector(0, 0, 0);
}
public Vector Variance()
{
return _n <= 1 ? new Vector(0, 0, 0) : _newVarianceSum.DivideBy(_n - 1);
}
}
如果手头有 Matrix
和 Vector
类,公式很简单:
Vector mean;
Matrix covariance;
for (int i = 0; i < points.size(); ++i) {
Vector diff = points[i] - mean;
mean += diff / (i + 1);
covariance += diff * diff.transpose() * i / (i + 1);
}
covariance *= 1 / points.size()
我个人总是更喜欢这种风格而不是二次计算。代码简短,结果完美。
Matrix
和 Vector
可以有固定的维度,并且可以很容易地为此目的编码。您甚至可以将代码重写为离散浮点计算,避免计算协方差矩阵的对称部分。
注意倒数第二行代码有一个向量外积。并非所有矢量库都能正确解释它。
来自 emu 的代码很优雅,但需要额外的步骤才能正确:
Vector mean;
Matrix covariance;
for (int i = 0; i < points.size(); ++i) {
Vector diff = points[i] - mean;
mean += diff / (i + 1);
covariance += diff * diff.transpose() * i / (i + 1);
}
covariance = covariance/(points.size()-1);
注意归一化协方差的最后一步。
这里用一个简单的R语言例子来说明原理:
a <- matrix(rnorm(22), ncol = 2)
a1 <- a[1:10, ]
a2 <- a[2:11, ]
cov(a1)
cov(a2)
m <- 10
# initial step
m1.1 <- mean(a1[, 1])
m1.2 <- mean(a1[, 2])
c1.11 <- cov(a1)[1, 1]
c1.22 <- cov(a1)[2, 2]
c1.12 <- cov(a1)[1, 2]
#step 1->2
m2.1 <- m1.1 + (a[11, 1] - a[1, 1])/m
m2.2 <- m1.2 + (a[11, 2] - a[1, 2])/m
c2.11 <- c1.11 + (a[11, 1]^2 - a[1, 1]^2)/(m - 1) + (m1.1^2 - m2.1^2) * m/(m - 1)
c2.22 <- c1.22 + (a[11, 2]^2 - a[1, 2]^2)/(m - 1) + (m1.2^2 - m2.2^2) * m/(m - 1)
c2.12 <- c1.12 + (a[11, 1] * a[11, 2] - a[1, 1]*a[1, 2])/(m - 1) +
(m1.1 * m1.2 - m2.1 * m2.2) * m/(m - 1)
cov(a2) - matrix(c(c2.11, c2.12, c2.12, c2.22), ncol=2)