神经网络数值梯度检查不适用于使用 Python-numpy 的矩阵
Neural network numerical gradient check not working with matrices using Python-numpy
我正在尝试使用 Python 3 和用于神经网络的 numpy 实现简单的数值梯度检查。
它适用于简单的一维函数,但在应用于参数矩阵时会失败。
我的猜测是我的成本函数没有为矩阵计算好,或者我进行数值梯度检查的方式不知何故是错误的。
查看下面的代码,感谢您的帮助!
import numpy as np
import random
import copy
def gradcheck_naive(f, x):
""" Gradient check for a function f.
Arguments:
f -- a function that takes a single argument (x) and outputs the
cost (fx) and its gradients grad
x -- the point (numpy array) to check the gradient at
"""
rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
#fx=cost
#grad=gradient
h = 1e-4
# Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index #multi-index number
random.setstate(rndstate)
xp = copy.deepcopy(x)
xp[ix] += h
fxp, gradp = f(xp)
random.setstate(rndstate)
xn = copy.deepcopy(x)
xn[ix] -= h
fxn, gradn = f(xn)
numgrad = (fxp-fxn) / (2*h)
# Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
if reldiff > 1e-5:
print ("Gradient check failed.")
print ("First gradient error found at index %s" % str(ix))
print ("Your gradient: %f \t Numerical gradient: %f" % (
grad[ix], numgrad))
return
it.iternext() # Step to next dimension
print ("Gradient check passed!")
#sanity check with 1D function
exp_f = lambda x: (np.sum(np.exp(x)), np.exp(x))
gradcheck_naive(exp_f, np.random.randn(4,5)) #this works fine
#sanity check with matrices
#forward pass
W = np.random.randn(5,10)
x = np.random.randn(10,3)
D = W.dot(x)
#backpropagation pass
gradx = W
func_f = lambda x: (np.sum(W.dot(x)), gradx)
gradcheck_naive(func_f, np.random.randn(10,3)) #this does not work (grad check fails)
我明白了! (我的数学老师会很自豪...)
简短的回答是我混淆了矩阵点积和元素明智的积。
当使用元素明智的产品时,梯度等于:
W = np.array([[2,4],[3,5],[3,1]])
x = np.array([[1,7],[5,-1],[4,7]])
D = W*x #element-wise multiplication
gradx = W
func_f = lambda x: (np.sum(W*x), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
使用点积时,梯度变为:
W = np.array([[2,4],[3,5]]))
x = np.array([[1,7],[5,-1],[5,1]])
D = x.dot(W)
unitary = np.array([[1,1],[1,1],[1,1]])
gradx = unitary.dot(np.transpose(W))
func_f = lambda x: (np.sum(x.dot(W)), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
我也想知道元素明智的乘积如何处理维度不相等的矩阵,如下所示:
x = np.random.randn(10)
W = np.random.randn(3,10)
D1 = x*W
D2 = W*x
结果是 D1=D2(与 W=3x10 相同的维度),我的理解是 x 被 numpy 广播为 3x10 矩阵以允许元素明智的乘法。
结论:有疑问,用小矩阵写出来,找出错误所在。
我正在尝试使用 Python 3 和用于神经网络的 numpy 实现简单的数值梯度检查。
它适用于简单的一维函数,但在应用于参数矩阵时会失败。
我的猜测是我的成本函数没有为矩阵计算好,或者我进行数值梯度检查的方式不知何故是错误的。
查看下面的代码,感谢您的帮助!
import numpy as np
import random
import copy
def gradcheck_naive(f, x):
""" Gradient check for a function f.
Arguments:
f -- a function that takes a single argument (x) and outputs the
cost (fx) and its gradients grad
x -- the point (numpy array) to check the gradient at
"""
rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
#fx=cost
#grad=gradient
h = 1e-4
# Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index #multi-index number
random.setstate(rndstate)
xp = copy.deepcopy(x)
xp[ix] += h
fxp, gradp = f(xp)
random.setstate(rndstate)
xn = copy.deepcopy(x)
xn[ix] -= h
fxn, gradn = f(xn)
numgrad = (fxp-fxn) / (2*h)
# Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
if reldiff > 1e-5:
print ("Gradient check failed.")
print ("First gradient error found at index %s" % str(ix))
print ("Your gradient: %f \t Numerical gradient: %f" % (
grad[ix], numgrad))
return
it.iternext() # Step to next dimension
print ("Gradient check passed!")
#sanity check with 1D function
exp_f = lambda x: (np.sum(np.exp(x)), np.exp(x))
gradcheck_naive(exp_f, np.random.randn(4,5)) #this works fine
#sanity check with matrices
#forward pass
W = np.random.randn(5,10)
x = np.random.randn(10,3)
D = W.dot(x)
#backpropagation pass
gradx = W
func_f = lambda x: (np.sum(W.dot(x)), gradx)
gradcheck_naive(func_f, np.random.randn(10,3)) #this does not work (grad check fails)
我明白了! (我的数学老师会很自豪...)
简短的回答是我混淆了矩阵点积和元素明智的积。
当使用元素明智的产品时,梯度等于:
W = np.array([[2,4],[3,5],[3,1]])
x = np.array([[1,7],[5,-1],[4,7]])
D = W*x #element-wise multiplication
gradx = W
func_f = lambda x: (np.sum(W*x), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
使用点积时,梯度变为:
W = np.array([[2,4],[3,5]]))
x = np.array([[1,7],[5,-1],[5,1]])
D = x.dot(W)
unitary = np.array([[1,1],[1,1],[1,1]])
gradx = unitary.dot(np.transpose(W))
func_f = lambda x: (np.sum(x.dot(W)), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
我也想知道元素明智的乘积如何处理维度不相等的矩阵,如下所示:
x = np.random.randn(10)
W = np.random.randn(3,10)
D1 = x*W
D2 = W*x
结果是 D1=D2(与 W=3x10 相同的维度),我的理解是 x 被 numpy 广播为 3x10 矩阵以允许元素明智的乘法。
结论:有疑问,用小矩阵写出来,找出错误所在。