Chainer 入门教程(4)基于MNIST数据集进行训练

# Initial setup following
import numpy as np
import chainer
from chainer import cuda, Function, gradient_check, report, training, utils, Variable
from chainer import datasets, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
import chainer.functions as F
import chainer.links as L
from import extensions
%matplotlib inline
class MLP(chainer.Chain):
    """Neural Network definition, Multi Layer Perceptron"""
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__()
        with self.init_scope():
            # the size of the inputs to each layer will be inferred when `None`
            self.l1 = L.Linear(None, n_units)  # n_in -> n_units
            self.l2 = L.Linear(None, n_units)  # n_units -> n_units
            self.l3 = L.Linear(None, n_out)    # n_units -> n_out
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y





全连接层的输出可以采用任意实数,Softmax函数将其转换为0-1之间,因此可以将其视为“该标签的概率”。交叉熵是计算两个概率分布之间的损失。 Chainer具有函数F.softmax_cross_entropy(y,t)来计算y和t的softmax 交叉熵。如果预测为y的概率分布等于实际概率分布t,则损失将更小。直观地说,当模型可以预测给定图像正确的标签时,损失减少。


class SoftmaxClassifier(chainer.Chain):
    def __init__(self, predictor):
        super(SoftmaxClassifier, self).__init__(
    def __call__(self, x, t):
        y = self.predictor(x)
        self.loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        return self.loss


unit = 50                 # Number of hidden layer units, try incresing this value and see if how accuracy changes.
# Set up a neural network to train
model = MLP(unit, 10)
# Classifier will calculate classification loss, based on the output of model
classifier_model = SoftmaxClassifier(model)

首先,MLP模型被创建。 n_out被设定为10,因为MNIST在标签中有10个模式,从0到9。然后根据MLP模型作为预测器创建classifier_model。正如你在这里看到的,Chain类的网络可以被“连接”来构建也是Chain类的新网络。我想这就是Chainer这个名字的由来。


# Setup an optimizer
optimizer = chainer.optimizers.Adam()


# Pass the loss function (Classifier defines it) and its arguments
optimizer.update(classifier_model, x, t)




使用 GPU

Chainer支持GPU加速计算。要使用GPU,PC必须具有NVIDIA GPU,并且需要安装CUDA,并且最好安装cudnn,然后安装chainer。


if gpu >= 0:
    chainer.cuda.get_device(gpu).use()  # Make a specified GPU current
    classifier_model.to_gpu()           # Copy the model to the GPU
xp = np if gpu < 0 else cuda.cupy


如果不使用GPU,请设置gpu = -1,表示不使用GPU,只使用CPU。在这种情况下,numpy(在上面写成np)用于数组计算。

如果你想使用GPU,设置GPU = 0等(一般消费电脑与NVIDIA GPU包含一个GPU核心,因此只有GPU设备ID = 0可以使用。GPU集群有几个GPU(0,1,2,3等)在一台PC)。在这种情况下,调用chainer.cuda.get_device(gpu).use()来指定要使用的GPU设备,并使用model.to_gpu()将模型的内部参数复制到GPU中。在这种情况下,cupy用于数组计算。


在Python科学计算中,numpy广泛用于向量,矩阵和一般张量计算。 numpy会自动优化这些与CPU的线性运算。 cupy可以被认为是numpy的GPU版本,这样你就可以写出与numpy几乎相同的GPU计算代码。由Chainer团队开发,作为在Chainer版本1中的chainer.cuda.cupy。 但是,cupy本身可以用作numpy的GPU版本,因此适用于更广泛的用例,不仅适用于chainer。所以cupy将从chainer中独立出来,并作为chainer版本2中的cupy模块提供。






# training
perm = np.random.permutation(N)
sum_accuracy = 0
sum_loss = 0
start = time.time()
for i in six.moves.range(0, N, batchsize):
    x = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][0]))
    t = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][1]))

    # Pass the loss function (Classifier defines it) and its arguments
    optimizer.update(classifier_model, x, t)

    sum_loss += float( * len(
    sum_accuracy += float( * len(
end = time.time()
elapsed_time = end - start
throughput = N / elapsed_time
print('train mean loss={}, accuracy={}, throughput={} images/sec'.format(
    sum_loss / N, sum_accuracy / N, throughput))





# evaluation
sum_accuracy = 0
sum_loss = 0
for i in six.moves.range(0, N_test, batchsize):
    index = np.asarray(list(range(i, i + batchsize)))
    x = chainer.Variable(xp.asarray(test[index][0]))
    t = chainer.Variable(xp.asarray(test[index][1]))

    loss = classifier_model(x, t)
    sum_loss += float( * len(
    sum_accuracy += float( * len(

print('test  mean loss={}, accuracy={}'.format(
    sum_loss / N_test, sum_accuracy / N_test))


  • 增加数据大小(如果可能的话)。
    • 数据增强是有效增加数据的一种方法。
  • 减少神经网络中的内部参数数量
    • 尝试更简单的网络
  • 添加正则化术项目


Very simple implementation for MNIST training code with Chainer using
Multi Layer Perceptron (MLP) model
This code is to explain the basic of training procedure.
from __future__ import print_function
import time
import os
import numpy as np
import six
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import cuda
from chainer import serializers
class MLP(chainer.Chain):
    """Neural Network definition, Multi Layer Perceptron"""
    def __init__(self, n_units, n_out):
        super(MLP, self).__init__(
            # the size of the inputs to each layer will be inferred
            l1=L.Linear(None, n_units),  # n_in -> n_units
            l2=L.Linear(None, n_units),  # n_units -> n_units
            l3=L.Linear(None, n_out),  # n_units -> n_out
    def __call__(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        y = self.l3(h2)
        return y
class SoftmaxClassifier(chainer.Chain):
    """Classifier is for calculating loss, from predictor's output.
    predictor is a model that predicts the probability of each label.
    def __init__(self, predictor):
        super(SoftmaxClassifier, self).__init__(
    def __call__(self, x, t):
        y = self.predictor(x)
        self.loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        return self.loss
def main():
    # Configuration setting
    gpu = -1                  # GPU ID to be used for calculation. -1 indicates to use only CPU.
    batchsize = 100           # Minibatch size for training
    epoch = 20                # Number of training epoch
    out = 'result/1_minimum'  # Directory to save the results
    unit = 50                 # Number of hidden layer units, try incresing this value and see if how accuracy changes.
    print('GPU: {}'.format(gpu))
    print('# unit: {}'.format(unit))
    print('# Minibatch-size: {}'.format(batchsize))
    print('# epoch: {}'.format(epoch))
    print('out directory: {}'.format(out))
    # Set up a neural network to train
    model = MLP(unit, 10)
    # Classifier will calculate classification loss, based on the output of model
    classifier_model = SoftmaxClassifier(model)
    if gpu >= 0:
        chainer.cuda.get_device(gpu).use()  # Make a specified GPU current
        classifier_model.to_gpu()           # Copy the model to the GPU
    xp = np if gpu < 0 else cuda.cupy
    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()
    n_epoch = epoch
    N = len(train)       # training data size
    N_test = len(test)  # test data size
    # Learning loop
    for epoch in range(1, n_epoch + 1):
        print('epoch', epoch)
        # training
        perm = np.random.permutation(N)
        sum_accuracy = 0
        sum_loss = 0
        start = time.time()
        for i in six.moves.range(0, N, batchsize):
            x = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][0]))
            t = chainer.Variable(xp.asarray(train[perm[i:i + batchsize]][1]))
            # Pass the loss function (Classifier defines it) and its arguments
            optimizer.update(classifier_model, x, t)
            sum_loss += float( * len(
            sum_accuracy += float( * len(
        end = time.time()
        elapsed_time = end - start
        throughput = N / elapsed_time
        print('train mean loss={}, accuracy={}, throughput={} images/sec'.format(
            sum_loss / N, sum_accuracy / N, throughput))
        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        for i in six.moves.range(0, N_test, batchsize):
            index = np.asarray(list(range(i, i + batchsize)))
            x = chainer.Variable(xp.asarray(test[index][0]))
            t = chainer.Variable(xp.asarray(test[index][1]))
            loss = classifier_model(x, t)
            sum_loss += float( * len(
            sum_accuracy += float( * len(
        print('test  mean loss={}, accuracy={}'.format(
            sum_loss / N_test, sum_accuracy / N_test))
    # Save the model and the optimizer
    if not os.path.exists(out):
    print('save the model')
    serializers.save_npz('{}/classifier_mlp.model'.format(out), classifier_model)
    serializers.save_npz('{}/mlp.model'.format(out), model)
    print('save the optimizer')
    serializers.save_npz('{}/mlp.state'.format(out), optimizer)
if __name__ == '__main__':