【发布时间】:2020-12-12 08:52:05
【问题描述】:
我一直在尝试用 Python 从头开始创建一个基本的神经网络。这就是我想出的。
激活函数和衍生物
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def relu(Z):
return np.maximum(0, Z)
# derivatives
def d_relu(Z):
return (Z > 0) * 1
def d_sigmoid(Z):
return sigmoid(Z) * (1 - sigmoid(Z))
参数初始化
def initialize_params(layer_dims):
W, b = {}, {}
L = len(layer_dims)
for l in range(1, L):
W[l] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
b[l] = np.zeros((layer_dims[l], 1))
return W, b
前向传播
def forward_prop(X, W, b, activation_func='relu'):
Z = np.dot(W, X) + b
if activation_func == 'relu':
A = relu(Z)
elif activation_func == 'sigmoid':
A = sigmoid(Z)
elif activation_func == 'tanh':
A = tanh(Z)
return Z, A
费用
def compute_cost(A_L, Y):
m = Y.shape[1]
cost = -np.sum(Y * np.log(A_L) + (1 - Y) * np.log(1 - A_L), axis=1, keepdims=True) / m
return np.squeeze(cost)
后向传播
def backward_prop(dA, A_prev, Z, W, activation_func):
if activation_func == 'relu':
dZ = dA * d_relu(Z)
elif activation_func == 'sigmoid':
dZ = dA * d_sigmoid(Z)
elif activation_func == 'tanh':
dZ = dA * d_tanh(Z)
m = A_prev.shape[1]
dW = np.dot(dZ, A_prev.T) / m
db = np.sum(dZ, axis=1, keepdims=True) / m
dA_prev = np.dot(W.T, dZ)
return dA_prev, dW, db
更新参数
def update_params(W, b, dW, db, learning_rate):
L = len(W)
for l in range(1, L):
W[l] -= learning_rate * dW[l]
b[l] -= learning_rate * db[l]
return W, b
神经网络类
class LLayerNeuralNet:
def __init__(self, layer_dims, activation_funcs):
self.layer_dims = layer_dims
self.activation_funcs = activation_funcs
self.L = len(layer_dims)
# initalize W and b
self.W, self.b = initialize_params(layer_dims)
def train(self, X, Y, num_iter=1000, learning_rate=0.01, print_costs=True):
self.Z, self.A = {}, {}
self.dZ, self.dA, self.dW, self.db = {}, {}, {}, {}
self.costs = []
self.learning_rate = learning_rate
self.A[0] = X
for i in range(1, num_iter+1):
# forward propagation
for l in range(1, self.L):
self.Z[l], self.A[l] = forward_prop(self.A[l-1], self.W[l], self.b[l], self.activation_funcs[l])
# compute cost
if i % 100 == 0:
cost = compute_cost(self.A[self.L-1], Y)
self.costs.append(cost)
if print_costs:
print('Cost after %d: %f' %(i, cost))
# backward propagation
self.dA[self.L-1] = - np.divide(Y, self.A[self.L-1]) + np.divide(1 - Y, 1 - self.A[self.L-1])
for l in reversed(range(1, self.L)):
self.dA[l-1], self.dW[l], self.db[l] = backward_prop(self.dA[l], self.A[l-1], self.Z[l], self.W[l], self.activation_funcs[l])
# update parameters
for l in range(1, self.L):
self.W[l] -= learning_rate * self.dW[l]
self.b[l] -= learning_rate * self.db[l]
def predict(X_pred):
A_pred = {}
A_pred[0] = X_pred
for i in range(num_iter):
for l in range(1, self.L+1):
_, A_pred[l] = forward_prop(A_pred[l-1], self.W[l], self.b[l], self.activation_funcs[l])
return (A_pred[self.L] > 0.5) * 1.0
创建模型并对其进行训练
layer_dims = [12288, 20, 7, 5, 1]
funcs = {1:'relu', 2:'relu', 3:'relu', 4:'sigmoid'}
model = LLayerNeuralNet(layer_dims, funcs)
model.train(train_x, train_y, learning_rate=0.0075)
运行 1000 个 epoch 后,成本为:
100 后的成本:0.678129
200 后的成本:0.667676
300 后的成本:0.660471
400 后的成本:0.655489
500 后的成本:0.652033
600 后的成本:0.649628
700 后的成本:0.647949
800后成本:0.646774
900 后的成本:0.645949
1000 之后的成本:0.645369
我认为这是不正确的,因为成本并没有降低多少。 有没有办法测试我的实现是否正确?
【问题讨论】:
-
另外,应该如何调试这样的东西?
-
您首先要解决的用例是什么?你能详细说明一下吗?也许你可以在那个用例上试试。
-
我并没有试图解决任何特定的问题。我从 deeplearning.ai coursera 课程中了解了神经网络。我只是想创建自己的神经网络,以确保我理解理论(编程任务不是那么好)。我从上一个作业中取出数据集(猫与非猫分类器)