完成第一个神经网络后的下一步是什么？ [关闭]答案

【问题标题】：What is the next step when having completed first neural network? [closed]完成第一个神经网络后的下一步是什么？ [关闭]
【发布时间】：2020-01-28 12:52:49
【问题描述】：

我是 youtube 频道 3Blue1Brown 的忠实粉丝，他关于神经网络的系列真的让我对这个主题感到兴奋。我决定用 Python 从头开始创建自己的神经网络，并深入研究数学。因此，在 MNIST 手写数字数据库的帮助下，我在 2 周后开始并成功完成了这项任务。从那时起，我一直在进一步开发我的代码，以便我可以在代码中巧妙地调整神经元和隐藏层的数量。我还尝试了不同的激活函数。我得到的最佳准确率约为 95%，其中包含 2 个隐藏层，包含 16 个神经元和 5 分钟的训练。

现在，我的问题相当模糊，但我现在正在寻找该领域的下一个挑战，你们有什么建议吗？

我现在已经设置了框架，所以我希望使用更大的数据集或其他东西来解决一些新类型的问题，或者我应该更多地解决现有问题以进一步提高输出的准确性？

你们觉得呢？

你的，埃米尔

（如果有人感兴趣，这里是代码）

import pickle
import gzip
import numpy as np
import random
import time

import pickle
import gzip
import numpy as np
import random
import time

class mnistClass:
    def __init__(self, inputAmount=784, layers=2, layerSize=16, outputSize=10, loops=1, sampleSize=100):
        with gzip.open('mnist.pkl.gz', 'rb') as f:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        self.A, self.y = train_set
        self.V, self.v2 = valid_set
        self.dataSize = len(self.A)
        self.inputAmount = inputAmount
        self.layers = layers
        self.layerSize = layerSize
        self.outputSize = outputSize
        self.loops = loops
        self.sampleSize = sampleSize
        self.iterations = int(self.dataSize/self.sampleSize)
        self.clock = time.time()
        self.Weights = []
        self.Biases = []
        self.initializeArrays()
        self.initializeTraining()
        print("Accuracy: " + str(self.getAccuracy()) + "%")

    def initializeArrays(self):
        for i in range(self.layers):
            if self.layers - i > 2:     #Adding middle layers
                self.Weights.append(np.random.rand(self.layerSize, self.layerSize)-0.5) 
            if self.layers - i > 1: 
                self.Biases.append(np.random.rand(self.layerSize)-0.5)
        if self.layers > 1:
            self.Weights.insert(0, np.random.rand(self.layerSize, self.inputAmount)-0.5)
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.layerSize)-0.5)
        else:
            self.Weights.insert(len(self.Weights), np.random.rand(self.outputSize, self.inputAmount)-0.5)
        self.Biases.insert(len(self.Biases), np.random.rand(self.outputSize)-0.5)

    def sigmoid(self, x, shiftType):
        if shiftType == 0:
            result = 1/(1+np.exp(-x))
        elif shiftType == 1:
            result = 2 * (1/(1+np.exp(-x))) - 1
        return result

    def sigmoidPrime(self, x, shiftType):
        if shiftType == 0:
            result = self.sigmoid(x, 0) - self.sigmoid(x, 0)**2
        elif shiftType == 1:
            result = 2*np.exp(-x)/(1+np.exp(-x))**2
        return result

    def Rdependance(self, Z, layer1, layer2, multi=False):  #How R depends on a preceeding R
        multi = layer1-layer2 > 1
        if not multi:
            if layer1 == self.layers-1:
                shiftType = 0
            else:
                shiftType = 1           
            R1_R2_differential = np.multiply(self.Weights[layer1], self.sigmoidPrime(Z[layer1]+self.Biases[layer1], shiftType)[:, np.newaxis])
            result = R1_R2_differential
        else:
            chainRule = []
            for i in reversed(range(layer2, layer1)):
                chainRule.append(self.Rdependance(Z, i+1, i))
            result = chainRule[0]
            for i in range(len(chainRule)-1):
                result = np.dot(result, chainRule[i+1])
        return result

    def RWdependance(self, R, Z, dataCaseNo, layer):   #How R depends on connecting Weights
        if layer == self.layers-1:
            shiftType = 0
        else:
            shiftType = 1
        R_W_differential = self.Weights[layer]/self.Weights[layer]
        mergeW_Z = np.multiply(R_W_differential, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis])
        if layer == 0:
            R_W_differential = np.multiply(mergeW_Z.T, self.A[dataCaseNo][:, np.newaxis]).T
        else:
            R_W_differential = np.multiply(mergeW_Z.T, R[layer-1][:, np.newaxis]).T
        return R_W_differential

    def RBdependance(self, Z, layer):   #How R depends on internal Biases
        if layer == self.layers-1:
            shiftType = 0
        else:
            shiftType = 1
        R_B_differential = np.multiply(self.Rdependance(Z, self.layers-1, layer).T, self.sigmoidPrime(Z[layer]+self.Biases[layer], shiftType)[:, np.newaxis]).T
        return R_B_differential

    def integralWeightCost(self, R, Z, dataCaseNo, quadDifferential, layer): # Cost of system for weights
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
        else:
            nodes = self.Rdependance(Z, self.layers-1, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        cost_differential = np.sum(cost_differential, 0)
        result = np.multiply(self.RWdependance(R, Z, dataCaseNo, layer), cost_differential[:, np.newaxis])
        return result

    def integralBiasCost(self, Z, quadDifferential, layer): # Cost of system for biases
        if layer == self.layers-1:
            nodes = np.identity(self.outputSize)
        else:
            nodes = self.RBdependance(Z, layer)
        cost_differential = np.multiply(nodes, quadDifferential[:, np.newaxis])
        result = np.sum(cost_differential, 0)
        return result



    def initializeTraining(self):
        for loop in range(self.loops):
            for iteration in range(self.iterations):
                avg_cost = 0
                avg_deltaWeights = []
                avg_deltaBiases = []
                for i in range(len(self.Weights)):  #Creating zeros of weight arrays           
                    avg_deltaWeights.append(self.Weights[i]*0)
                for i in range(len(self.Biases)):                 
                    avg_deltaBiases.append(self.Biases[i]*0)
                for dataCaseNo in range(iteration*self.sampleSize, iteration*self.sampleSize + self.sampleSize):
                    if self.layers == 1:
                        shiftType = 0
                    else:
                        shiftType = 1
                    Y1 = np.zeros(self.outputSize)
                    Y1[self.y[dataCaseNo]] = 1
                    Z = []
                    Z.append(np.dot(self.Weights[0], self.A[dataCaseNo]))
                    R = []
                    R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
                    for i in range(1, self.layers):
                        if i == self.layers-1:
                            shiftType = 0
                        else:
                            shiftType = 1
                        Z.append(np.dot(self.Weights[i], R[i-1]))
                        R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

                    C = np.sum((R[-1] - Y1)**2)
                    avg_cost += C
                    quadDifferential = 2 * (R[-1]-Y1)

                    for i in range(self.layers):
                        avg_deltaWeights[i] += self.integralWeightCost(R, Z, dataCaseNo, quadDifferential, i)
                        avg_deltaBiases[i] += self.integralBiasCost(Z, quadDifferential, i)

                avg_cost = avg_cost/self.sampleSize
                for i in range(self.layers):
                    self.Weights[i] = self.Weights[i] - avg_deltaWeights[i]/self.sampleSize
                    self.Biases[i] = self.Biases[i] - avg_deltaBiases[i]/self.sampleSize
                print("Average cost: " + str(round(avg_cost, 4)))
            print("\n" + "*"*25 + " " + str(loop+1) +" " + "*"*25 + "\n")
        executionEndTime = round((time.time() - self.clock), 2)
        print("Completed " + str(self.loops) + " rounds of " + str(self.sampleSize*self.iterations) + " samples (sampleSize: " + str(self.sampleSize) + "), " + " in " + str(executionEndTime) + " seconds..")
        print("Layers: " + str(self.layers))
        print("Middle layer nodes: " + str(self.layerSize))
        print("Input amount: " + str(self.inputAmount))
        amountVariables = 0
        for i in range(self.layers):
            amountVariables += self.Weights[i].size
            amountVariables += self.Biases[i].size
        print("Variables: " + str(amountVariables))
        print("Output size: " + str(self.outputSize))
        time.sleep(2)

    def getAccuracy(self):
        runs = 10000
        correct = 0
        print("Testing validation set accuracy over " + str(runs) + " samples...\n")
        for i in range(runs):
            if self.layers == 1:
                shiftType = 0
            else:
                shiftType = 1
            ran = i
            Y1 = np.zeros(self.outputSize)
            Y1[self.v2[ran]] = 1
            Z = []
            Z.append(np.dot(self.Weights[0], self.V[ran]))
            R = []
            R.append(self.sigmoid(Z[0]+self.Biases[0], shiftType))
            for i in range(1, self.layers):
                if i == self.layers-1:
                    shiftType = 0
                else:
                    shiftType = 1
                Z.append(np.dot(self.Weights[i], R[i-1]))
                R.append(self.sigmoid(Z[i]+self.Biases[i], shiftType))

            result = np.where(R[-1] == np.amax(R[-1]))
            maxNum = result[0][0]
            if int(self.v2[ran]) == int(maxNum):
                correct += 1

        accuracy = correct*100/runs
        return accuracy              


instance = mnistClass(784, 3, 16, 10, 2, 100)
#(input, layers, layer size, output, loops, sample subsize)

#input          - amount of nodes in input data
#layers         - amount of layers including last output layer but not first input layer
#layer size     - amount of nodes in hidden layers
#output         - amount of nodes in output layer
#loops          - how many times to train through the entire data set
#sample subsize - what quantity of data samples to average the gradient on

【问题讨论】：

标签： python neural-network mnist

【解决方案1】：

我很高兴听到新面孔加入 ML（特别是 DL）领域，你说你已经取得了这样的成就，所以首先致敬。现在，至于你的问题，我建议你退后一步，理解数据探索和特征提取的概念，以及为什么这些很重要，我建议你如何去做是通过探索一些关于机器学习的 kaggle 教程，尝试从那里对数据集进行一些基本分类，如泰坦尼克号数据集等......

https://www.kaggle.com/learn/overview 去“进入机器学习”。

祝你好运！

【讨论】：

谢谢，我去看看。我之前其实并不知道 ML 和 DL 之间有区别！ :)
DL 是 ML 的一个子主题，当然，根据我的经验，kaggle 教程很棒！顺便说一句，如果您能提出我的答案或为我的声誉接受它，我将不胜感激（;