from mxnet import gluon from mxnet import ndarray as nd def SGD(params, lr): for param in params: param[:] = param - lr * param.grad def transform(data, label): return data.astype('float32')/255, label.astype('float32') mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform) mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform) # 标签对应的服饰名字 def get_text_labels(label): text_labels = [ 't-shirt', 'trouser', 'pullover', 'dress,', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot' ] return [text_labels[int(i)] for i in label] # 数据读取 batch_size = 256 # gluon.data的DataLoader 函数,它每次 yield ⼀个批量 train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True) test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False) #初始化参数 num_inputs = 784 num_outputs = 10 W = nd.random_normal(shape=(num_inputs, num_outputs)) b = nd.random_normal(shape=num_outputs) params = [W, b] for param in params: param.attach_grad() # 定义模型 # 多分类中,输出为每个类别的概率,这些概率和为1,通过softmax函数实现 from mxnet import nd def softmax(X): exp = nd.exp(X) partition = exp.sum(axis=1, keepdims=True) return exp / partition def net(X): return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b) # 交叉熵损失函数 # 我们需要定义⼀个针对预测为概率值的损失函数。其中最常⻅的是交叉熵损失函数,它将两个概率 # 分布的负交叉熵作为⽬标值,最小化这个值等价于最⼤化这两个概率的相似度。 def corss_entropy(yhat, y): return - nd.pick(nd.log(yhat), y) # 计算精度 # 给定⼀个概率输出,我们将预测概率最⾼的那个类作为预测的类,然后通过⽐较真实标号得到是否预测正确 def accuracy(output, label): return nd.mean(output.argmax(axis=1)==label).asscalar() def evaluate_accuracy(data_iterator, net): acc = 0 for data, label in data_iterator: output = net(data) # acc_tmp = accuracy(output, label) acc = acc + accuracy(output, label) return acc/len(data_iterator) # print(evaluate_accuracy(test_data, net)) # # import sys # sys.path.append('..') from mxnet import autograd learning_rate = 0.1 epochs = 5 for epoch in range(epochs): train_loss = 0 train_acc = 0 for data, label in train_data: with autograd.record(): output = net(data) loss = corss_entropy(output, label) loss.backward() # 将梯度做平均,这样学习率会对 batch size 不那么敏感 SGD(params, learning_rate / batch_size) train_loss = train_loss + nd.mean(loss).asscalar() train_acc += accuracy(output, label) # 模型训练完之后进行测试 test_acc = evaluate_accuracy(test_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % ( epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc)) # 对新的样本进行标签预测 # 训练完之后,W,b参数已经固定,输入data,得到label就是预测过程 data, label = mnist_test[0:9] print('true labels') print(get_text_labels(label)) predicted_labels = net(data).argmax(axis=1) print('predicted labels') print(get_text_labels(predicted_labels.asnumpy()))
多类逻辑回归 — 使用Gluon
from mxnet import gluon from mxnet import ndarray as nd from mxnet import autograd def transform(data, label): return data.astype('float32') / 255, label.astype('float32') mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform) mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform) batch_size = 256 train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True) test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False) num_inputs = 784 net = gluon.nn.Sequential()#定义空模型 with net.name_scope(): net.add(gluon.nn.Flatten()) # 将数据展开为batch_size*X格式的 net.add(gluon.nn.Dense(10))#输出为10 net.initialize()#初始化 softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() # Softmax 与CrossEntropyLoss的复合函数 trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})#优化 def accuracy(output, label): return nd.mean(output.argmax(axis=1) == label).asscalar() def evaluate_accuracy(test_data, net): acc = .0 for data, label in test_data: output = net(data) acc += accuracy(output, label) return acc / len(test_data) epochs = 15 for epoch in range(epochs): total_loss = .0 total_acc = .0 for data, label in train_data: with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() trainer.step(batch_size)#更新模型 total_loss += nd.mean(loss).asscalar() total_acc += accuracy(output, label) test_acc = evaluate_accuracy(train_data, net) print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % ( epoch, total_loss / len(train_data), total_acc / len(train_data), test_acc))