【发布时间】:2020-08-20 18:44:48
【问题描述】:
我对机器学习比较陌生,作为一个初学者项目,我决定使用 NumPy 在 Python 中从头开始实现我自己的神经网络。因此,我手动实现了前向传播、反向传播和计算函数导数的方法。
对于我的测试数据,我编写了一个生成 sin(x) 值的函数。当我最终创建和训练我的网络时,我的输出在每次试验中波动很大,并且明显偏离真实值(尽管它们比最初的预测有了不错的改进)。
我尝试调整了很多设置,包括学习率、神经元数量、层数、训练迭代和激活函数,但最终我的输入数据的平方成本约为 0.1。
我认为我的导数函数和链式规则表达式是正确的,因为当我只使用一个输入样本时,我得到了一个近乎完美的答案。
但是,添加更多输入数据会显着降低网络的准确性。
你们对如何改进这个网络有什么建议,或者我目前做错了什么?
我的代码:
import numpy as np
#Generate input data for the network
def inputgen():
inputs=[]
outputs=[]
i=0.01
for x in range(10000):
inputs.append([round(i,7)])
outputs.append([np.sin(i)]) #output is sin(x)
i+=0.0001
return [inputs,outputs]
#set training input and output
inputs = np.array(inputgen()[0])
outputs = np.array(inputgen()[1])
#sigmoid activation function and derivative
def sigmoid(x):
return 1/(1+np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x)*(1-sigmoid(x))
#tanh activation function and derivative
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1-((tanh(x))**2)
#Layer class
class Layer:
def __init__(self,num_neurons,num_inputs,inputs):
self.num_neurons = num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons(1 in the case of testing data)
self.inputs = inputs
self.weights = np.random.rand(num_inputs,num_neurons)*np.sqrt(1/num_inputs) #weights initialized by Xavier function
self.biases = np.zeros((1,num_neurons)) #biases initialized as 0
self.z = np.dot(self.inputs,self.weights)+self.biases #Cacluate z
self.a = tanh(self.z) #Calculate activation
self.dcost_a = [] #derivative of cost with respect to activation
self.da_z = [] #derivative of activation with respect to z
self.dz_w = [] #derivative of z with respect to weight
self.dcost_w = [] #derivative of cost with respect to weight
self.dcost_b = [] #derivative of cost with respect to bias
#functions used in forwardpropagation
def compute_z(self):
self.z = np.dot(self.inputs,self.weights)+self.biases
return self.z
def activation(self):
self.a = tanh(self.compute_z())
def forward(self):
self.activation()
#Network class
class Network:
def __init__(self,num_layers,num_neurons,num_inputs,inputs,num_outputs,outputs):
self.learningrate = 0.01 #learning rate
self.num_layers=num_layers #number of hidden layers
self.num_neurons=num_neurons #number of neurons in hidden layers
self.num_inputs = num_inputs #number of input neurons
self.inputs=inputs
self.expected_outputs=outputs
self.layers=[]
for x in range(num_layers):
if x==0:
self.layers.append(Layer(num_neurons,num_inputs,inputs)) #Initial layer with given inputs
else:
#Other layers have an input which is the activation of previous layer
self.layers.append(Layer(num_neurons,len(self.layers[x-1].a[0]),self.layers[x-1].a))
self.prediction = Layer(num_outputs,num_neurons,self.layers[-1].a) #prediction
self.layers.append(self.prediction)
self.cost = (self.prediction.a-self.expected_outputs)**2 #cost
#forwardpropagation
def forwardprop(self):
for x in range(self.num_layers+1):
if(x!=0):
self.layers[x].inputs=self.layers[x-1].a
self.layers[x].forward()
self.prediction=self.layers[-1] #update prediction value
def backprop(self):
self.cost = (self.prediction.a-self.expected_outputs)**2
for x in range(len(self.layers)-1,-1,-1):
if(x==len(self.layers)-1):
dcost_a = 2*(self.prediction.a-self.expected_outputs) #derivative of cost with respect to activation for output layer
else:
#derivative of cost with respect to activation for hidden layers(chain rule)
dcost_a=np.zeros((len(self.layers[x].inputs),self.num_neurons)).T
dcost_a1=self.layers[x+1].dcost_a.T
da_z1=self.layers[x+1].da_z.T
dz_a=(self.layers[x+1].weights).T
for z in range(len(dcost_a1)):
dcost_a+=((dcost_a1[z])*da_z1)
for j in range(len(dcost_a)):
dcost_a[j]*=dz_a[z][j]
dcost_a=dcost_a.T
self.layers[x].dcost_a=dcost_a
#derivative of activation with respect to z
da_z = tanh_derivative(self.layers[x].z)
self.layers[x].da_z=da_z
#derivative of z with respect to weights
dz_w = []
if x!=0:
dz_w=self.layers[x-1].a
else:
dz_w=self.inputs
self.layers[x].dz_w=dz_w
#change weights and biases
for x in range(len(self.layers)-1,-1,-1):
#Average each of the derivatives over all training samples
self.layers[x].dcost_a=np.average(self.layers[x].dcost_a,axis=0)
self.layers[x].da_z=np.average(self.layers[x].da_z,axis=0)
self.layers[x].dz_w=(np.average(self.layers[x].dz_w,axis=0)).T
self.layers[x].dcost_w = np.zeros((self.layers[x].weights.shape))
self.layers[x].dcost_b = self.layers[x].dcost_a*self.layers[x].da_z
for v in range(len(self.layers[x].dz_w)):
self.layers[x].dcost_w[v] = (self.layers[x].dcost_a*self.layers[x].da_z)*self.layers[x].dz_w[v]
#update weights and biases
self.layers[x].weights-=(self.layers[x].dcost_w)*self.learningrate
self.layers[x].biases-=(self.layers[x].dcost_b)*self.learningrate
#train the network
def train(self):
for x in range(1000):
self.backprop()
self.forwardprop()
Network1 = Network(3,3,1,inputs,1,outputs)
Network1.train()
print(Network1.prediction.a)
示例输入:
[[0.01 ]
[0.0101]
[0.0102]
...
[1.0097]
[1.0098]
[1.0099]]
示例输出:
[[0.37656753]
[0.37658777]
[0.37660802]
...
[0.53088048]
[0.53089046]
[0.53090043]]
预期输出:
[[0.00999983]
[0.01009983]
[0.01019982]
...
[0.84667225]
[0.84672546]
[0.84677865]]
【问题讨论】:
-
在程序中调整学习率也很常见,考虑使用交叉熵函数来做到这一点
-
@JSwang 嘿,谢谢你的帮助。然而,我确实调整了很多次学习率(从 0.01 到 0.5),但我仍然得到相当不准确的结果。
-
你做到了?不是通过硬编码,而是在程序运行时添加一个函数来调整学习率?
-
@JSwang 哦,我明白你的意思了。对不起,我误会了。我没试过。这究竟是如何工作的?我是否应该在每次训练迭代中更改学习率?
标签: python machine-learning neural-network