寻找对数表示矩阵的特征值
Finding eigenvalues of log-represented matrix
我目前正在尝试查找条件非常差的实对称矩阵的特征值,其元素可能非常 small/large(从 1e-8 到 1e15)并且可能为负数。虽然矩阵很小 (4x4),因此执行速度不是主要问题。
对角化之前的所有操作都是使用 "logsumexp" 技巧的变体完成的(适用于来自该线程 numerically stable way to multiply log probability matrices in numpy 的矩阵-矩阵/矩阵-向量乘法,并进一步适应负系数),所以我最终得到两个矩阵(sgnM,logM),分别包含 M 中元素的绝对值的符号和对数。这部分效果很好。
但是,我没有找到任何将其作为输入并一直使用这些数字技巧直到 returns 特征值的等效 eigvalsh 文档。
现在,我只使用
scipy.linalg.eigvalsh(sgnM*np.exp(logM))
仍然存在精度问题(当我更改参数时,我感兴趣的特征值从 1e-4 变为 1e-9 左右,我可以看到 1e-9 左右的那些比第一个以至于从那里得出的结果不再有意义)。
现有的线性代数引擎中是否隐藏了一些类似于我正在寻找的函数?如果没有,我是否仍然可以依靠某些 Lapack/MKL/Blas 例程来实现它,还是需要从头开始?
我设法实现了它,不幸的是它并没有给我带来太大的改善。
我仍然把它留在这里,以防有人想尝试解决另一个问题(这是基于 https://github.com/mateuv/MetodosNumericos/blob/master/python/NumericalMethodsInEngineeringWithPython/jacobi.py:
的代码
import numpy as np
from scipy.special import logsumexp
def eigsh_jacobi_stable(log_a, sgn_a, tol = 1.0e-14): # Jacobi method
def maxElem(log_a): # Find largest off-diag. element a[k,l]
n = len(log_a)
log_aMax = np.log(1e-36)
for i in range(n-1):
for j in range(i+1,n):
if log_a[i,j] >= log_aMax:
log_aMax = log_a[i,j]
k = i; l = j
return log_aMax,k,l
def rotate(log_a, sgn_a, log_p, sgn_p, k, l): # Rotate to make a[k,l] = 0
n = len(log_a)
log_aDiff, sgn_aDiff = logsumexp(a=[log_a[l,l], log_a[k,k]],
b=[sgn_a[l,l], -sgn_a[k,k]],
return_sign=True)
if log_a[k,l] - log_aDiff < np.log(1.0e-36):
log_t = log_a[k,l] - log_aDiff
sgn_t = sgn_a[k,l] * sgn_aDiff
else:
log_phi = log_aDiff - np.log(2) - log_a[k,l]
sgn_phi = sgn_aDiff * sgn_a[k,l]
log_t = -logsumexp(a=[log_phi, .5*logsumexp(a=[2*log_phi, 0])])
sgn_t = 1.
if sgn_phi < 0:
sgn_t = -1
log_c = - .5 * logsumexp(a=[2*log_t, 0])
sgn_c = 1.
log_s = log_t + log_c
sgn_s = sgn_t * sgn_c
log_tau = log_s - logsumexp(a=[log_c, 0])
sgn_tau = sgn_s
log_temp, sgn_temp = log_a[k,l], sgn_a[k,l]
log_a[k,l] = np.log(1.0e-36)
sgn_a[k, l] = 0
log_a[k,k], sgn_a[k,k] = logsumexp(a=[log_a[k,k], log_t + log_temp],
b=[sgn_a[k,k], -sgn_t * sgn_temp],
return_sign=True)
log_a[l,l], sgn_a[l,l] = logsumexp(a=[log_a[l,l], log_t + log_temp],
b=[sgn_a[l,l], sgn_t * sgn_temp],
return_sign=True)
for i in range(k): # Case of i < k
log_temp, sgn_temp = log_a[i,k], sgn_a[i,k]
log_plop, sgn_plop = logsumexp(a=[log_a[i, l], log_tau + log_temp],
b=[sgn_a[i, l], sgn_tau * sgn_temp],
return_sign=True)
log_a[i,k], sgn_a[i,k] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[i, l]],
b=[sgn_temp, -sgn_tau * sgn_a[i, l]],
return_sign=True)
log_a[i,l], sgn_a[i,l] = logsumexp(a=[log_a[i, l], log_s + log_plop],
b=[sgn_a[i, l], sgn_plop * sgn_s],
return_sign=True)
for i in range(k+1,l): # Case of k < i < l
log_temp, sgn_temp = log_a[k, i], sgn_a[k,i]
log_plop, sgn_plop = logsumexp(a=[log_a[i, l], log_tau + log_a[k,i]],
b=[sgn_a[i, l], sgn_tau * sgn_a[k,i]],
return_sign=True)
log_a[k, i], sgn_a[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[i, l]],
b=[sgn_temp, -sgn_tau * sgn_a[i, l]],
return_sign=True)
log_a[i,l], sgn_a[i,l] = logsumexp(a=[log_a[i, l], log_s + log_plop],
b=[sgn_a[i, l], sgn_plop * sgn_s],
return_sign=True)
for i in range(l+1,n): # Case of i > l
log_temp, sgn_temp = log_a[k, i], sgn_a[k,i]
log_plop, sgn_plop = logsumexp(a=[log_a[l,i], log_tau + log_temp],
b=[sgn_a[l,i], sgn_tau * sgn_temp],
return_sign=True)
log_a[k, i], sgn_a[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[l,i]],
b=[sgn_temp, -sgn_tau * sgn_a[l,i]],
return_sign=True)
log_a[l,i], sgn_a[l,i] = logsumexp(a=[log_a[l,i], log_s + log_plop],
b=[sgn_a[l, i], sgn_plop * sgn_s],
return_sign=True)
for i in range(n): # Update transformation matrix
log_temp, sgn_temp = log_p[i,k], sgn_p[i,k]
log_plop, sgn_plop = logsumexp(a=[log_p[i,l], log_tau + log_p[i,k]],
b=[sgn_p[i,l], sgn_tau * sgn_p[i,k]],
return_sign=True)
log_p[k, i], sgn_p[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_p[i, l]],
b=[sgn_temp, -sgn_tau * sgn_p[i, l]],
return_sign=True)
log_p[i,l], sgn_p[i,l] = logsumexp(a=[log_p[i, l], log_s + log_plop],
b=[sgn_p[i, l], sgn_plop * sgn_s],
return_sign=True)
n = len(log_a)
maxRot = 5*(n**2) # Set limit on number of rotations
log_p, sgn_p = np.log(1e-36) * np.ones_like(log_a), np.zeros_like(log_a)
np.fill_diagonal(log_p, 0)
np.fill_diagonal(sgn_p, 1)
for i in range(maxRot): # Jacobi rotation loop
log_aMax,k,l = maxElem(log_a)
# print(log_aMax)
if np.exp(log_aMax) < tol:
return np.sort(np.exp(np.diag(log_a)) * np.diag(sgn_a))
rotate(log_a, sgn_a, log_p, sgn_p ,k,l)
raise RuntimeError('Jacobi method did not converge')
if __name__ == '__main__':
from scipy.linalg import eigvalsh
for _ in range(100):
test = np.random.randn(10, 10)
test += test.transpose()
log_test, sgn_test = np.log(np.abs(test)), np.sign(test)
eigs_jacobi = np.sort(eigsh_jacobi_stable(log_test, sgn_test)) # this sort is unnecessary, already done internally
eigs_scipy = np.sort(eigvalsh(test))
print(eigs_jacobi-eigs_scipy)
我目前正在尝试查找条件非常差的实对称矩阵的特征值,其元素可能非常 small/large(从 1e-8 到 1e15)并且可能为负数。虽然矩阵很小 (4x4),因此执行速度不是主要问题。
对角化之前的所有操作都是使用 "logsumexp" 技巧的变体完成的(适用于来自该线程 numerically stable way to multiply log probability matrices in numpy 的矩阵-矩阵/矩阵-向量乘法,并进一步适应负系数),所以我最终得到两个矩阵(sgnM,logM),分别包含 M 中元素的绝对值的符号和对数。这部分效果很好。
但是,我没有找到任何将其作为输入并一直使用这些数字技巧直到 returns 特征值的等效 eigvalsh 文档。 现在,我只使用
scipy.linalg.eigvalsh(sgnM*np.exp(logM))
仍然存在精度问题(当我更改参数时,我感兴趣的特征值从 1e-4 变为 1e-9 左右,我可以看到 1e-9 左右的那些比第一个以至于从那里得出的结果不再有意义)。
现有的线性代数引擎中是否隐藏了一些类似于我正在寻找的函数?如果没有,我是否仍然可以依靠某些 Lapack/MKL/Blas 例程来实现它,还是需要从头开始?
我设法实现了它,不幸的是它并没有给我带来太大的改善。 我仍然把它留在这里,以防有人想尝试解决另一个问题(这是基于 https://github.com/mateuv/MetodosNumericos/blob/master/python/NumericalMethodsInEngineeringWithPython/jacobi.py:
的代码import numpy as np
from scipy.special import logsumexp
def eigsh_jacobi_stable(log_a, sgn_a, tol = 1.0e-14): # Jacobi method
def maxElem(log_a): # Find largest off-diag. element a[k,l]
n = len(log_a)
log_aMax = np.log(1e-36)
for i in range(n-1):
for j in range(i+1,n):
if log_a[i,j] >= log_aMax:
log_aMax = log_a[i,j]
k = i; l = j
return log_aMax,k,l
def rotate(log_a, sgn_a, log_p, sgn_p, k, l): # Rotate to make a[k,l] = 0
n = len(log_a)
log_aDiff, sgn_aDiff = logsumexp(a=[log_a[l,l], log_a[k,k]],
b=[sgn_a[l,l], -sgn_a[k,k]],
return_sign=True)
if log_a[k,l] - log_aDiff < np.log(1.0e-36):
log_t = log_a[k,l] - log_aDiff
sgn_t = sgn_a[k,l] * sgn_aDiff
else:
log_phi = log_aDiff - np.log(2) - log_a[k,l]
sgn_phi = sgn_aDiff * sgn_a[k,l]
log_t = -logsumexp(a=[log_phi, .5*logsumexp(a=[2*log_phi, 0])])
sgn_t = 1.
if sgn_phi < 0:
sgn_t = -1
log_c = - .5 * logsumexp(a=[2*log_t, 0])
sgn_c = 1.
log_s = log_t + log_c
sgn_s = sgn_t * sgn_c
log_tau = log_s - logsumexp(a=[log_c, 0])
sgn_tau = sgn_s
log_temp, sgn_temp = log_a[k,l], sgn_a[k,l]
log_a[k,l] = np.log(1.0e-36)
sgn_a[k, l] = 0
log_a[k,k], sgn_a[k,k] = logsumexp(a=[log_a[k,k], log_t + log_temp],
b=[sgn_a[k,k], -sgn_t * sgn_temp],
return_sign=True)
log_a[l,l], sgn_a[l,l] = logsumexp(a=[log_a[l,l], log_t + log_temp],
b=[sgn_a[l,l], sgn_t * sgn_temp],
return_sign=True)
for i in range(k): # Case of i < k
log_temp, sgn_temp = log_a[i,k], sgn_a[i,k]
log_plop, sgn_plop = logsumexp(a=[log_a[i, l], log_tau + log_temp],
b=[sgn_a[i, l], sgn_tau * sgn_temp],
return_sign=True)
log_a[i,k], sgn_a[i,k] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[i, l]],
b=[sgn_temp, -sgn_tau * sgn_a[i, l]],
return_sign=True)
log_a[i,l], sgn_a[i,l] = logsumexp(a=[log_a[i, l], log_s + log_plop],
b=[sgn_a[i, l], sgn_plop * sgn_s],
return_sign=True)
for i in range(k+1,l): # Case of k < i < l
log_temp, sgn_temp = log_a[k, i], sgn_a[k,i]
log_plop, sgn_plop = logsumexp(a=[log_a[i, l], log_tau + log_a[k,i]],
b=[sgn_a[i, l], sgn_tau * sgn_a[k,i]],
return_sign=True)
log_a[k, i], sgn_a[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[i, l]],
b=[sgn_temp, -sgn_tau * sgn_a[i, l]],
return_sign=True)
log_a[i,l], sgn_a[i,l] = logsumexp(a=[log_a[i, l], log_s + log_plop],
b=[sgn_a[i, l], sgn_plop * sgn_s],
return_sign=True)
for i in range(l+1,n): # Case of i > l
log_temp, sgn_temp = log_a[k, i], sgn_a[k,i]
log_plop, sgn_plop = logsumexp(a=[log_a[l,i], log_tau + log_temp],
b=[sgn_a[l,i], sgn_tau * sgn_temp],
return_sign=True)
log_a[k, i], sgn_a[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_a[l,i]],
b=[sgn_temp, -sgn_tau * sgn_a[l,i]],
return_sign=True)
log_a[l,i], sgn_a[l,i] = logsumexp(a=[log_a[l,i], log_s + log_plop],
b=[sgn_a[l, i], sgn_plop * sgn_s],
return_sign=True)
for i in range(n): # Update transformation matrix
log_temp, sgn_temp = log_p[i,k], sgn_p[i,k]
log_plop, sgn_plop = logsumexp(a=[log_p[i,l], log_tau + log_p[i,k]],
b=[sgn_p[i,l], sgn_tau * sgn_p[i,k]],
return_sign=True)
log_p[k, i], sgn_p[k, i] = logsumexp(a=[log_temp, log_s + log_plop],
b=[sgn_temp, -sgn_plop * sgn_s],
return_sign=True)
log_plop, sgn_plop = logsumexp(a=[log_temp, log_tau + log_p[i, l]],
b=[sgn_temp, -sgn_tau * sgn_p[i, l]],
return_sign=True)
log_p[i,l], sgn_p[i,l] = logsumexp(a=[log_p[i, l], log_s + log_plop],
b=[sgn_p[i, l], sgn_plop * sgn_s],
return_sign=True)
n = len(log_a)
maxRot = 5*(n**2) # Set limit on number of rotations
log_p, sgn_p = np.log(1e-36) * np.ones_like(log_a), np.zeros_like(log_a)
np.fill_diagonal(log_p, 0)
np.fill_diagonal(sgn_p, 1)
for i in range(maxRot): # Jacobi rotation loop
log_aMax,k,l = maxElem(log_a)
# print(log_aMax)
if np.exp(log_aMax) < tol:
return np.sort(np.exp(np.diag(log_a)) * np.diag(sgn_a))
rotate(log_a, sgn_a, log_p, sgn_p ,k,l)
raise RuntimeError('Jacobi method did not converge')
if __name__ == '__main__':
from scipy.linalg import eigvalsh
for _ in range(100):
test = np.random.randn(10, 10)
test += test.transpose()
log_test, sgn_test = np.log(np.abs(test)), np.sign(test)
eigs_jacobi = np.sort(eigsh_jacobi_stable(log_test, sgn_test)) # this sort is unnecessary, already done internally
eigs_scipy = np.sort(eigvalsh(test))
print(eigs_jacobi-eigs_scipy)