为什么我的神经网络预测为 -0(PYTHON - 反向传播 XOR)?
Why is my neural network predicting -0 (PYTHON - backpropagation XOR)?
我正致力于从头开始开发神经网络。问题似乎出在我的 relu 反向传播上。当我训练模型时,它有时会输出 -0,有时会输出良好的预测(相对)。有人可以告诉我我的反向传播是否有误,或者我的 relu 会预测 -0 是否有原因?
--
[编辑]
修复了预测 -0 的问题,但现在它只是预测 XOR 的所有输入为 0。有人可以查看我的反向传播吗?
import numpy as np
# Each layer in our neural network
class NeuralLayer:
def __init__(self, input_neurons, output_neurons):
self.weights = np.random.randn(input_neurons, output_neurons)* np.sqrt(2. / input_neurons)
self.bias = np.ones((1,output_neurons)) * 0.5
# Two different activations, sigmoid by default
def sigmoid(self, neurons):
self.act = 1.0/(1.0 + np.exp(-neurons))
return self.act
def sigmoidBackward(self, grad):
return grad * self.act * (1 - self.act)
def relu(self, neurons):
self.act = (neurons > 0)
return neurons * self.act
def reluBackward(self, grad):
return grad * self.act
# Forward pass for this layer
def forward(self, input, activation):
self.input = np.atleast_2d(input)
if activation == 'sigmoid':
return self.sigmoid(input @ self.weights + self.bias)
else:
return self.relu(input @ self.weights + self.bias)
# backward pass for this layer
def backward(self, grad, activation):
if activation == 'sigmoid':
grad = self.sigmoidBackward(np.atleast_2d(grad))
else:
grad = self.reluBackward(np.atleast_2d(grad))
self.grad_weights = np.matmul(self.input.T, grad)
self.grad_bias = grad.sum()
return grad @ self.weights.T
def step(self, step_size):
self.weights -= step_size*self.grad_weights
self.bias -= step_size*self.grad_bias
# Our neural net
class NeuralNetwork:
# Dynamically create all layers
def __init__(self, input_neurons, hidden_neurons, layer_count, activation, output_neurons = 1):
self.activation = activation
# Used to ensure input neurons match inputted data
self.neuron_safety = input_neurons
assert layer_count >= 2 and output_neurons >= 1
# Input layer
self.layers = [NeuralLayer(input_neurons, hidden_neurons)]
# Hidden Layers
for i in range(layer_count - 2):
self.layers.append(NeuralLayer(hidden_neurons, hidden_neurons))
# Output layer
self.layers.append(NeuralLayer(hidden_neurons, output_neurons))
# Forward pass for each layer
def forward(self, inp):
assert inp.shape[0] == self.neuron_safety
for layer in self.layers:
inp = layer.forward(inp, self.activation)
return inp
def backward(self, grad):
for layer in reversed(self.layers):
grad = layer.backward(grad, self.activation)
def step(self, step_size = 0.01):
for layer in self.layers:
layer.step(step_size)
# loss function - only 1 output neuron
def meanSquaredError(self, preds, labels):
self.labels = labels
self.preds = preds
return (self.preds - self.labels)**2
def meanSquaredErrorGrad(self):
return 2 * (self.preds - self.labels)
# Create a neural network with 2 inputs, 2 hidden neurons in each layer, and 2 layers
net = NeuralNetwork(2,16,4, 'relu')
epochs = 5000
# Input data (A,B) for XOR
X = np.array([[0,0],[1,1], [1,0],[0,1]])
# Expected output data
Y = np.array([[0],[0],[1],[1]])
for i in range(epochs):
preds = []
for idx, x in enumerate(X):
predictions = net.forward(x)
preds.append(predictions)
loss = net.meanSquaredError(predictions, Y[idx])
loss_grad = net.meanSquaredErrorGrad()
net.backward(loss_grad)
net.step()
print("Model predicted: {}\nactual values: {} ".format(preds, Y.T))
输出:
Model predicted: [array([[-0.]]), array([[-0.]]), array([[1.]]), array([[-0.]])]
实际值:[[0 0 1 1]]
有时预测是完美的,但大多数时候至少有一个预测为 -0
偏差梯度不正确。您正在使用 self.grad_bias = grad.sum()
。这将计算整个矩阵的总和。它需要 self.grad_bias = grad.sum(axis=0, keepdims=True)
来计算 1 x output_neurons
数组,该数组将正确更新偏置向量。否则,grad.sum()
会提供您用来更新所有偏见的单个数字,这是不正确的。
此外,请确保将 ReLU 的前向传递更新为 np.maximum(neurons, 0)
,如评论中所述。
def relu(self, neurons):
self.act = (neurons > 0)
return np.maximum(neurons, 0)
激活的梯度将为 0 或 1,具体取决于输入的哪些部分为正。
最后,对于 XOR 问题,您通常不使用 ReLU 作为输出层的激活,因为根据 XOR 问题,它不在 [0-1] 之间。使用 sigmoid 激活函数获得良好结果的原因是该激活函数的动态范围非常适合 XOR 问题。作为实验,您可以将输出层修改为 sigmoid,将隐藏层修改为 ReLU。如果你这样做,你应该获得与一直使用 sigmoid 一样好的性能。
我正致力于从头开始开发神经网络。问题似乎出在我的 relu 反向传播上。当我训练模型时,它有时会输出 -0,有时会输出良好的预测(相对)。有人可以告诉我我的反向传播是否有误,或者我的 relu 会预测 -0 是否有原因?
-- [编辑]
修复了预测 -0 的问题,但现在它只是预测 XOR 的所有输入为 0。有人可以查看我的反向传播吗?
import numpy as np
# Each layer in our neural network
class NeuralLayer:
def __init__(self, input_neurons, output_neurons):
self.weights = np.random.randn(input_neurons, output_neurons)* np.sqrt(2. / input_neurons)
self.bias = np.ones((1,output_neurons)) * 0.5
# Two different activations, sigmoid by default
def sigmoid(self, neurons):
self.act = 1.0/(1.0 + np.exp(-neurons))
return self.act
def sigmoidBackward(self, grad):
return grad * self.act * (1 - self.act)
def relu(self, neurons):
self.act = (neurons > 0)
return neurons * self.act
def reluBackward(self, grad):
return grad * self.act
# Forward pass for this layer
def forward(self, input, activation):
self.input = np.atleast_2d(input)
if activation == 'sigmoid':
return self.sigmoid(input @ self.weights + self.bias)
else:
return self.relu(input @ self.weights + self.bias)
# backward pass for this layer
def backward(self, grad, activation):
if activation == 'sigmoid':
grad = self.sigmoidBackward(np.atleast_2d(grad))
else:
grad = self.reluBackward(np.atleast_2d(grad))
self.grad_weights = np.matmul(self.input.T, grad)
self.grad_bias = grad.sum()
return grad @ self.weights.T
def step(self, step_size):
self.weights -= step_size*self.grad_weights
self.bias -= step_size*self.grad_bias
# Our neural net
class NeuralNetwork:
# Dynamically create all layers
def __init__(self, input_neurons, hidden_neurons, layer_count, activation, output_neurons = 1):
self.activation = activation
# Used to ensure input neurons match inputted data
self.neuron_safety = input_neurons
assert layer_count >= 2 and output_neurons >= 1
# Input layer
self.layers = [NeuralLayer(input_neurons, hidden_neurons)]
# Hidden Layers
for i in range(layer_count - 2):
self.layers.append(NeuralLayer(hidden_neurons, hidden_neurons))
# Output layer
self.layers.append(NeuralLayer(hidden_neurons, output_neurons))
# Forward pass for each layer
def forward(self, inp):
assert inp.shape[0] == self.neuron_safety
for layer in self.layers:
inp = layer.forward(inp, self.activation)
return inp
def backward(self, grad):
for layer in reversed(self.layers):
grad = layer.backward(grad, self.activation)
def step(self, step_size = 0.01):
for layer in self.layers:
layer.step(step_size)
# loss function - only 1 output neuron
def meanSquaredError(self, preds, labels):
self.labels = labels
self.preds = preds
return (self.preds - self.labels)**2
def meanSquaredErrorGrad(self):
return 2 * (self.preds - self.labels)
# Create a neural network with 2 inputs, 2 hidden neurons in each layer, and 2 layers
net = NeuralNetwork(2,16,4, 'relu')
epochs = 5000
# Input data (A,B) for XOR
X = np.array([[0,0],[1,1], [1,0],[0,1]])
# Expected output data
Y = np.array([[0],[0],[1],[1]])
for i in range(epochs):
preds = []
for idx, x in enumerate(X):
predictions = net.forward(x)
preds.append(predictions)
loss = net.meanSquaredError(predictions, Y[idx])
loss_grad = net.meanSquaredErrorGrad()
net.backward(loss_grad)
net.step()
print("Model predicted: {}\nactual values: {} ".format(preds, Y.T))
输出:
Model predicted: [array([[-0.]]), array([[-0.]]), array([[1.]]), array([[-0.]])]
实际值:[[0 0 1 1]]
有时预测是完美的,但大多数时候至少有一个预测为 -0
偏差梯度不正确。您正在使用 self.grad_bias = grad.sum()
。这将计算整个矩阵的总和。它需要 self.grad_bias = grad.sum(axis=0, keepdims=True)
来计算 1 x output_neurons
数组,该数组将正确更新偏置向量。否则,grad.sum()
会提供您用来更新所有偏见的单个数字,这是不正确的。
此外,请确保将 ReLU 的前向传递更新为 np.maximum(neurons, 0)
,如评论中所述。
def relu(self, neurons):
self.act = (neurons > 0)
return np.maximum(neurons, 0)
激活的梯度将为 0 或 1,具体取决于输入的哪些部分为正。
最后,对于 XOR 问题,您通常不使用 ReLU 作为输出层的激活,因为根据 XOR 问题,它不在 [0-1] 之间。使用 sigmoid 激活函数获得良好结果的原因是该激活函数的动态范围非常适合 XOR 问题。作为实验,您可以将输出层修改为 sigmoid,将隐藏层修改为 ReLU。如果你这样做,你应该获得与一直使用 sigmoid 一样好的性能。