我的反向传播有什么问题？答案

【问题标题】：What's wrong with my backpropagation?我的反向传播有什么问题？
【发布时间】：2018-02-13 13:42:00
【问题描述】：

我正在尝试在 python 中从头开始编写神经网络。为了检查一切是否正常，我想过度拟合网络，但损失似乎一开始会爆炸，然后回到初始值并停在那里（不收敛）。我检查了我的代码并找到了原因。我认为我对反向传播的理解或实现是不正确的，但可能还有其他原因。任何人都可以帮助我或至少为我指明正确的方向吗？

# Initialize weights and biases given dimesnsions (For this example the dimensions are set to [12288, 64, 1])
def initialize_parameters(dims):
    # Initiate parameters
    parameters = {} 
    L = len(dims) # Number of layers in the network

    # Loop over the given dimensions. Initialize random weights and set biases to zero.
    for i in range(1, L):
        parameters["W" + str(i)] = np.random.randn(dims[i], dims[i-1]) * 0.01
        parameters["b" + str(i)] = np.zeros([dims[i], 1])

    return parameters

# Activation Functions
def relu(x, deriv=False):
    if deriv:
        return 1. * (x > 0)
    else:
        return np.maximum(0,x)

def sigmoid(x, deriv=False):
    if deriv:
        return x * (1-x)
    else:
        return 1/(1 + np.exp(-x))


# Forward and backward pass for 2 layer neural network. (1st relu, 2nd sigmoid)
def forward_backward(X, Y, parameters):
    # Array for storing gradients
    grads = {}

    # Get the length of examples
    m = Y.shape[1]

    # First layer
    Z1 = np.dot(parameters["W1"], X) + parameters["b1"]
    A1 = relu(Z1)

    # Second layer
    Z2 = np.dot(parameters["W2"], A1) + parameters["b2"]
    AL = sigmoid(Z2)

    # Compute cost
    cost = (-1 / m) * np.sum(np.multiply(Y, np.log(AL)) + np.multiply(1 - Y, np.log(1 - AL)))

    # Backpropagation
    # Second Layer
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    dZ2 = dAL * sigmoid(AL, deriv=True)
    grads["dW2"] = np.dot(dZ2, A1.T) / m
    grads["db2"] = np.sum(dZ2, axis=1, keepdims=True) / m

    # First layer
    dA1 = np.dot(parameters["W2"].T, dZ2)
    dZ1 = dA1 * relu(A1, deriv=True)
    grads["dW1"] = np.dot(dZ1, X.T)
    grads["db1"] = np.sum(dZ1, axis=1, keepdims=True) / m

    return AL, grads, cost

# Hyperparameters
dims = [12288, 64, 1]
epoches = 2000
learning_rate = 0.1

# Initialize parameters
parameters = initialize_parameters(dims)
log_list = []

# Train the network
for i in range(epoches):
    # Get X and Y
    x = np.array(train[0:10],ndmin=2).T
    y = np.array(labels[0:10], ndmin=2).T

    # Perform forward and backward pass
    AL, grads, cost = forward_backward(x, y, parameters)

    # Compute cost and append to the log_list
    log_list.append(cost)

    # Update parameters with computed gradients 
    parameters = update_parameters(grads, parameters, learning_rate)

plt.plot(log_list)
plt.title("Loss of the network")
plt.show()

【问题讨论】：

你在哪里计算错误？如果我发布了解决 XOR 问题的 python 神经网络解决方案，这会有帮助吗？
对不起，我的错。我用方程替换了函数。一切都会有帮助

标签： python neural-network backpropagation

【解决方案1】：

我正在努力寻找计算误差梯度的地方，输入训练数据样本也会有所帮助...

我不知道这是否对你有帮助，但我将分享我的 Python 神经网络学习 XOR 问题的解决方案。

import numpy as np


def sigmoid_function(x, derivative=False):
    """
    Sigmoid function
    “x” is the input and “y” the output, the nonlinear properties of this function means that
    the rate of change is slower at the extremes and faster in the centre. Put plainly,
    we want the neuron to “make its mind up” instead of indecisively staying in the middle.
    :param x: Float
    :param Derivative: Boolean
    :return: Float
    """
    if (derivative):
        return x * (1 - x)  # Derivative using the chain rule.
    else:
        return 1 / (1 + np.exp(-x))


# create dataset for XOR problem
input_data = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
ideal_output = np.array([[0.0], [1.0], [1.0], [0.0]])


#initialize variables
learning_rate = 0.1
epoch = 50000 #number or iterations basically - One round of forward and back propagation is called an epoch

# get the second element from the numpy array shape field to detect the count of features for input layer
input_layer_neurons = input_data.shape[1]
hidden_layer_neurons = 3 #number of hidden layer neurons
output_layer_neurons = 1 #number of output layer neurons

#init weight & bias
weights_hidden = np.random.uniform(size=(input_layer_neurons, hidden_layer_neurons))
bias_hidden = np.random.uniform(1, hidden_layer_neurons)
weights_output = np.random.uniform(size=(hidden_layer_neurons, output_layer_neurons))
bias_output = np.random.uniform(1, output_layer_neurons)

for i in range(epoch):

    #forward propagation
    hidden_layer_input_temp = np.dot(input_data, weights_hidden) #matrix dot product to adjust for weights in the layer
    hidden_layer_input = hidden_layer_input_temp + bias_hidden #adjust for bias
    hidden_layer_activations = sigmoid_function(hidden_layer_input) #use the activation function
    output_layer_input_temp = np.dot(hidden_layer_activations, weights_output)
    output_layer_input = output_layer_input_temp + bias_output
    output = sigmoid_function(output_layer_input) #final output

    #backpropagation (where adjusting of the weights happens)
    error = ideal_output - output #error gradient
    if (i % 1000 == 0):
        print("Error: {}".format(np.mean(abs(error))))

    #use derivatives to compute slope of output and hidden layers
    slope_output_layer = sigmoid_function(output, derivative=True)
    slope_hidden_layer = sigmoid_function(hidden_layer_activations, derivative=True)

    #calculate deltas
    delta_output = error * slope_output_layer
    error_hidden_layer = delta_output.dot(weights_output.T) #calculates the error at hidden layer
    delta_hidden = error_hidden_layer * slope_hidden_layer

    #change the weights
    weights_output += hidden_layer_activations.T.dot(delta_output) * learning_rate
    bias_output += np.sum(delta_output, axis=0, keepdims=True) * learning_rate
    weights_hidden += input_data.T.dot(delta_hidden) * learning_rate
    bias_hidden += np.sum(delta_hidden, axis=0, keepdims=True) * learning_rate

【讨论】：