【发布时间】:2014-09-26 05:14:00
【问题描述】:
我正在尝试使用我的旧反向传播代码作为基础来实现 rprop。我正在研究一个带有一个隐藏层的感知器。 Rprop 算法相当简单,但我还没有弄清楚所有事情。这是我的代码:
for (j = 1; j <= nnh; j++)
{
network.input2[j] = network.w12[0][j];
for (i = 1; i <= nni; i++)
network.input2[j] += network.input[i] * network.w12[i][j];
network.output2[j] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input2[j])));
}
for (k = 1; k <= nno; k++)
{
network.input3[k] = network.w23[0][k];
for (j = 1; j <= nnh; j++)
network.input3[k] += network.output2[j] * network.w23[j][k];
network.output[k] = (float)(1.0 / (1.0 + Math.Pow(Math.E, beta * -network.input3[k])));
error += (float)(0.5 * (t[k - 1] - network.output[k]) * (t[k - 1] - network.output[k]));
derivativeO[k] = (float)(t[k - 1] - network.output[k]) * network.output[k] * (1 - network.output[k]);
}
for (j = 1; j <= nnh; j++)
{
saw[j] = 0;
for (k = 1; k <= nno; k++)
saw[j] += derivativeO[k] * network.output2[j];
derivativeH[j] = saw[j] * network.output2[j] * (1 - network.output2[j]);
}
for (j = 1; j <= nnh; j++)//number of neurons in hidden layer
{
for (i = 1; i <= nni; i++)//number of inputs
{
network.gradientH[i][j] = network.input[i] * derivativeH[j];
if (network.gradientH[i][j] * network.gradientHPrev[i][j] > 0)
{
network.deltaH[i][j] = Math.Min(network.deltaH[i][j] * npos, dmax);
network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
network.w12[i][j] += network.w12d[i][j];
network.gradientHPrev[i][j] = network.gradientH[i][j];
}
else if (network.gradientH[i][j] * network.gradientHPrev[i][j] < 0)
{
network.deltaH[i][j] = Math.Max(network.deltaH[i][j] * nneg, dmin);
network.gradientHPrev[i][j] = 0;
}
else if (network.gradientH[i][j] * network.gradientHPrev[i][j] == 0)
{
network.w12d[i][j] = -Math.Sign(network.gradientH[i][j]) * network.deltaH[i][j];
network.w12[i][j] += network.w12d[i][j];
network.gradientHPrev[i][j] = network.gradientH[i][j];
}
}
}
for (k = 1; k <= nno; k++)//number of outputs
{
for (j = 1; j <= nnh; j++)//number of neurons in hidden layer
{
network.gradientO[j][k] = network.output2[j] * derivativeO[k];
if (network.gradientOPrev[j][k] * network.gradientO[j][k] > 0)
{
network.deltaO[j][k] = Math.Min(network.deltaO[j][k] * npos, dmax);
network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
network.w23[j][k] += network.w23d[j][k];
network.gradientOPrev[j][k] = network.gradientO[j][k];
}
else if (network.gradientOPrev[j][k] * network.gradientO[j][k] < 0)
{
network.deltaO[j][k] = Math.Max(network.deltaO[j][k] * nneg, dmin);
network.gradientOPrev[j][k] = 0;
}
else if (network.gradientOPrev[j][k] * network.gradientO[j][k] == 0)
{
network.w23d[j][k] = -Math.Sign(network.gradientO[j][k]) * network.deltaO[j][k];
network.w23[j][k] += network.w23d[j][k];
network.gradientOPrev[j][k] = network.gradientO[j][k];
}
}
}
前三个 for 循环与我在 backprop 中使用的相同。这部分代码工作正常。问题出现在权重更新期间。如果我正确计算偏导数,我现在不会。网络有时会收敛,有时只是随机运行。我认为我得到的一切都是正确的。有什么想法吗?
For 循环从 1 开始,因为在之前的反向传播实现中,偏差值存储在权重矩阵的第一个元素中。这是以前的反向传播权重更新实现,效果很好,也许它会让一些事情更清楚:
for (j = 1; j <= nnh; j++)
{
network.w12d[0][j] = learningRate * derivativeH[j] + momentum * network.w12d[0][j];
network.w12[0][j] += network.w12d[0][j];
for (i = 1; i <= nni; i++)
{
network.w12d[i][j] = learningRate * network.input[i] * derivativeH[j] + momentum * network.w12d[i][j];
network.w12[i][j] += network.w12d[i][j];
}
}
for (k = 1; k <= nno; k++)
{
network.w23d[0][k] = learningRate * derivativeO[k] + momentum * network.w23d[0][k];
network.w23[0][k] += network.w23d[0][k];
for (j = 1; j <= nnh; j++)
{
network.w23d[j][k] = learningRate * network.output2[j] * derivativeO[k] + momentum * network.w23d[j][k];
network.w23[j][k] += network.w23d[j][k];
}
}
【问题讨论】:
-
如何设置
network.deltaO和network.deltaH以及npos和nneg的初始步长?我发现如果这些值“太大”(不幸的是取决于问题),那么这个算法可能会表现得不稳定。 (FWIW 我在代码中没有看到任何问题。)
标签: c# neural-network backpropagation