从张量流中的预训练模型推断答案

【问题标题】：Inference from a pre-trained model in tensorflow从张量流中的预训练模型推断
【发布时间】：2017-08-02 22:11:55
【问题描述】：

我有一个预训练的模型，我试图在 tensorflow 中进行推理。但我想我错过了一些东西。这是我的架构：

# Define a function to create a conv layer
def add_conv_layer(incoming=None, num_filters=None, kSize=None, pad='SAME', train=False, s=1,
                   layer_name=None, weights_param=None, bias_param=None, activation_fn=None):

    with tf.variable_scope(layer_name):

        previous_layer_output_shape = incoming.get_shape().as_list()[-1]
        tshape  = [kSize, kSize, previous_layer_output_shape, num_filters]

        # Set weights for the layer
        if weights_param is not None:
            weights = tf.Variable(weights_param, name='weights', dtype=tf.float32)
            weights = tf.reshape(weights, tshape)
        else:
            weights = init_weights(tshape)
            print weights.shape

        # Set bias for the layer
        if bias_param is not None:
            bias = tf.Variable(bias_param, name='bias', dtype=tf.float32)
        else:
            bias = init_bias(num_filters)


        layer = tf.nn.conv2d(incoming, weights, strides=[1,s,s,1], padding=pad, name='layer_name') + bias

        if activation_fn is not None:
            layer = activation_fn(layer)

        return layer    



# Define a function to add a dense layer
def add_dense_layer(input_tensor=None, num_units=None, layer_name=None,
                   weight_params=None, bias_params=None, activation_fn=None):

    with tf.variable_scope(layer_name):

        previous_layer_output_shape = input_tensor.get_shape().as_list()[-1]
        tshape = [previous_layer_output_shape, num_units]

        # Add weights for the layer
        if weight_params is not None:
            weights = tf.Variable(weight_params, name='weights', dtype=tf.float32)
            weights = tf.reshape(weights, tshape)    
        else:
            weights = init_weights(tshape)

        # Add bias to the layer    
        if bias_params is not None:
            bias = tf.Variable(bias_params, name='bias', dtype=tf.float32)
        else:
            bias = init_bias(num_units)


        layer = tf.matmul(input_tensor, weights) + bias

        if activation_fn is not None:
            layer = activation_fn(layer)

        return layer


def build_network(features):
    features = tf.reshape(features, [-1,224,224,3])

    # Conv block 1
    conv1_1 = add_conv_layer(features, num_filters=64, kSize=3,  activation_fn=tf.nn.relu, 
                             weights_param=f['conv1_1']['weights'].value,
                             bias_param=f['conv1_1']['bias'].value,
                             layer_name='conv1_1')

    conv1_2 = add_conv_layer(conv1_1, num_filters=64, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv1_2']['weights'].value,
                             bias_param=f['conv1_2']['bias'].value,
                             layer_name='conv1_2')

    pool1 = tf.layers.max_pooling2d(inputs=conv1_2,pool_size=(2,2), strides=(2,2), name='pool1')



    # Conv block 2
    conv2_1 = add_conv_layer(pool1, num_filters=128, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv2_1']['weights'].value,
                             bias_param=f['conv2_1']['bias'].value,
                             layer_name='conv2_1')

    conv2_2 = add_conv_layer(conv2_1, num_filters=128, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv2_2']['weights'].value,
                             bias_param=f['conv2_2']['bias'].value,
                             layer_name='conv2_2')

    pool2 = tf.layers.max_pooling2d(inputs=conv2_2,pool_size=(2,2), strides=(2,2), name='pool2')



    # Conv block 3
    conv3_1 = add_conv_layer(pool2, num_filters=256, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv3_1']['weights'].value,
                             bias_param=f['conv3_1']['bias'].value,
                             layer_name='conv3_1')

    conv3_2 = add_conv_layer(conv3_1, num_filters=256, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv3_2']['weights'].value,
                             bias_param=f['conv3_2']['bias'].value,
                             layer_name='conv3_2')

    conv3_3 = add_conv_layer(conv3_2, num_filters=256, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv3_3']['weights'].value,
                             bias_param=f['conv3_3']['bias'].value,
                             layer_name='conv3_3')

    pool3 = tf.layers.max_pooling2d(inputs=conv3_3,pool_size=(2,2), strides=(2,2), name='pool3')


    # Conv block 4
    conv4_1 = add_conv_layer(pool3, num_filters=512, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv4_1']['weights'].value,
                             bias_param=f['conv4_1']['bias'].value,
                             layer_name='conv4_1')

    conv4_2 = add_conv_layer(conv4_1, num_filters=512, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv4_2']['weights'].value,
                             bias_param=f['conv4_2']['bias'].value,
                             layer_name='conv4_2')

    conv4_3 = add_conv_layer(conv4_2, num_filters=512, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv4_3']['weights'].value,
                             bias_param=f['conv4_3']['bias'].value,
                             layer_name='conv4_3')

    pool4 = tf.layers.max_pooling2d(inputs=conv4_3,pool_size=(2,2), strides=(2,2), name='pool4')



    # Conv block 5
    conv5_1 = add_conv_layer(pool4, num_filters=512, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv5_1']['weights'].value,
                             bias_param=f['conv5_1']['bias'].value,
                             layer_name='conv5_1')

    conv5_2 = add_conv_layer(conv5_1, num_filters=512, kSize=3, activation_fn=tf.nn.relu, 
                             weights_param=f['conv5_2']['weights'].value,
                             bias_param=f['conv5_2']['bias'].value,
                             layer_name='conv5_2')

    conv5_3 = add_conv_layer(conv5_2, num_filters=512, kSize=3, activation_fn=tf.nn.relu,
                             weights_param=f['conv5_3']['weights'].value,
                             bias_param=f['conv5_3']['bias'].value,
                             layer_name='conv5_3')

    pool5 = tf.layers.max_pooling2d(inputs=conv5_3,  pool_size=(7,7),strides=(7,7), name='img1_pool5')

    flatten_layer = tf.contrib.layers.flatten(inputs=pool5)

    fc6 = add_dense_layer(flatten_layer, num_units=1024, activation_fn=tf.nn.relu, 
                               weight_params=f['fc6']['weights'].value, 
                               bias_params= f['fc6']['bias'].value,
                               layer_name='fc6')


    fc7 = add_dense_layer(fc6, num_units=256, activation_fn=tf.nn.relu,
                          weight_params=f['fc7']['weights'].value, 
                          bias_params= f['fc7']['bias'].value,
                          layer_name='fc7')
    output = add_dense_layer(fc7, num_units=2, activation_fn=tf.nn.softmax,
                             weight_params=f['out']['weights'].value, 
                             bias_params= f['out']['bias'].value,
                             layer_name='output')

    return output

现在，我想为build_network 函数提供一个新输入并进行推理。我这样做是如图所示（我认为这是错误的）：

init = tf.global_variables_initializer()
out = build_network(features)
with tf.Session() as sess:
     sess.run(init)
     print(out.eval())

但是有了这个，我得到每个输入的相同输出。请帮忙

【问题讨论】：

标签： machine-learning tensorflow neural-network deep-learning conv-neural-network

【解决方案1】：

改编自原文：~~您的build_network 函数仅创建图表。 sess.run(init) 行使用每个权重的默认初始化器初始化网络的权重，即该行之后的网络未训练。~~

在将您的 te add_*_layer 函数的实现添加到代码中之后，我现在看到您确实初始化了权重。 cmets 对此答案的进一步信息表明，错误可能在于从 Theano 到 Tensorflow 的权重转换，或者正如@Sebastian 所建议的那样，在于数据布局的差异。不过，如果没有更多信息，我很难确定地说更多。

【讨论】：

我已经从我的磁盘加载了卷积层和密集层的权重。这就是为什么我分别定义了add_conv_layer 和add_dense_layer 函数来加载自定义权重
您能否也将这些函数的代码添加到您的问题中？我怀疑当您使用 global_variables_initializer 时权重会被覆盖
添加了这些功能
嗯，看起来您正在正确初始化这些值。权重是否来自不同的框架？（例如 Keras？）
是的...来自 theano。另外，我对它们进行了相应的更改，以便它们与 tensorflow 兼容