Tensorflow：如何实现多层dynamic_rnn？答案

【问题标题】：Tensorflow: How to implement Multilayered dynamic_rnn?Tensorflow：如何实现多层dynamic_rnn？
【发布时间】：2017-12-17 17:01:18
【问题描述】：

我构建了一个单层 LSTM。它有效。

以下代码重点介绍权重和偏差的定义以及RNN结构：

# Define weights
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}


def RNN(X, weights, biases):

    X = tf.reshape(X, [-1, n_inputs])

    X_in = tf.matmul(X, weights['in']) + biases['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)

    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    

        return results

pred = RNN(x, weights, biases) # prediction

现在，我想再添加一层 LSTM 单元。我在 Tensorflow 的官方网站上查看了示例。 https://www.tensorflow.org/tutorials/recurrent

但我很难弄清楚如何使用 MultiRNNCell。我尝试使用与普通神经网络相同的逻辑，将第一层的输出乘以偏置，然后发送到第二层。以下代码实现了这一点：

# Define weights
weights1 = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_hidden_units]))
}
biases1 = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ]))
}

weights2 = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases2 = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}



def RNN(X, weights1, biases1, weights2, biases2):

    X = tf.reshape(x, [-1, n_inputs])
    X_in = tf.matmul(X, weights1['in']) + biases1['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell1 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    lstm_cell2 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state1 = lstm_cell1.zero_state(batch_size_holder, dtype=tf.float32)
    init_state2 = lstm_cell2.zero_state(batch_size_holder, dtype=tf.float32)

    outputs1, final_state1 = tf.nn.dynamic_rnn(lstm_cell1, X_in, initial_state=init_state1, time_major=False)

    outputs1 = tf.unstack(tf.transpose(outputs1, [1,0,2]))
    results1 = tf.matmul(outputs1[-1], weights1['out']) + biases1['out']    

    input = tf.matmul(results1, weights2['in']) + biases2['in']
    input = tf.reshape(input, [-1, n_steps, n_hidden_units])
    outputs2, final_state2 = tf.nn.dynamic_rnn(lstm_cell2, input, initial_state=init_state2, time_major=False)

    outputs2 = tf.unstack(tf.transpose(outputs2, [1,0,2]))
    results2 = tf.matmul(outputs2[-1], weights2['out']) + biases2['out']    


    return results2

我只是制作了两层大小相等的 lstm_cells 并调用了 dynamic_rnn 两次。

我的第一个问题是，这段代码是否符合我的要求？

运行的时候报错：

ValueError: 变量 rnn/basic_lstm_cell/weights 已经存在，不允许。您的意思是在 VarScope 中设置 reuse=True 吗？

根据 TensorFlow，(https://www.tensorflow.org/tutorials/recurrent) 这是一个版本问题，应该通过添加reuse=tf.get_variable_scope().reuse来解决 BasicLSTMCell() 的参数。

但是，我的 BasicLSTMCell() 函数甚至没有“重用”参数。

你们知道如何让它工作吗？感谢您提供任何建议和帮助。

完整代码如下：

import tensorflow as tf


lr = 0.005

n_inputs = 128
n_steps = 255
n_hidden_units = 128 
number_of_layers = 2
n_classes = 1      
batch_size = 100
gradient = 0.1

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')


# Define weights
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}


def RNN(X, weights, biases):

    X = tf.reshape(X, [-1, n_inputs])

    X_in = tf.matmul(X, weights['in']) + biases['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)

    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    # shape = (128, 10)

    return results


pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)

sess = tf.Session()

init = tf.global_variables_initializer()
sess.run(init)

mydata = data(batch = batch_size, s = 10000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
    batch_xs, batch_ys = mydata.next()
    batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
    batch_ys = batch_ys.reshape([batch_size, 1])

    sess.run(train_step, feed_dict={
        x: batch_xs,
        y: batch_ys,
        batch_size_holder : 100
    })
    if step % 10 == 0:
        test_x, test_y = mydata.test()
        test_x = test_x.reshape([-1, n_steps, n_inputs])
        test_y = test_y.reshape([-1, 1])
        loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : 100})
        loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 500})
        train_loss.append(loss1)
        test_loss.append(loss2)

        print("training cost: ", loss1)
        print("testing cost: ", loss2)
    step += 1

sess.close()
import matplotlib.pyplot as plt
plt.plot(train_loss)
plt.plot(test_loss)

-------更新---------

感谢vijay的回答，更新后的代码如下：

请注意，该网络在输出结果之前有 2 (n_layers) 个 LSTM 层和 1 个密集层。

import tensorflow as tf


lr = 0.01
n_inputs = 128
n_steps = 255
n_hidden_units = 200 
n_layers = 2
number_of_layers = 2
n_classes = 1
batch_size = 100
gradient = 0.5


# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')


def lstm_cell():        
    return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

def RNN(X):

    lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)]) 
    init_state = lstm_stacked.zero_state(batch_size_holder, dtype=tf.float32)
    outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X, dtype=tf.float32)

    output = tf.layers.dense(outputs[:, -1, :], 1)

    return output


pred = RNN(x)
cost = tf.losses.mean_squared_error(y, pred)
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)

sess = tf.Session()

init = tf.global_variables_initializer()
sess.run(init)

mydata = data(batch = batch_size, s = 30000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
    batch_xs, batch_ys = mydata.next()
    batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
    batch_ys = batch_ys.reshape([batch_size, 1])

    sess.run(train_step, feed_dict={
        x: batch_xs,
        y: batch_ys,
        batch_size_holder : batch_size
    })
    if step % 10 == 0:
        test_x, test_y = mydata.test()
        test_x = test_x.reshape([-1, n_steps, n_inputs])
        test_y = test_y.reshape([-1, 1])
        loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : batch_size})
        loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 1500})
        train_loss.append(loss1)
        test_loss.append(loss2)

        print("training cost: ", loss1, "testing cost: ", loss2)

    step += 1

【问题讨论】：

标签： python tensorflow

【解决方案1】：

如果您想要multi-layer LSTM，您可以使用tf.contrib.rnn.MultiRNNCell。所以对于两层：

n_layers = 2
lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)]) 
outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X_in, dtype=tf.float32)

def lstm_cell():        
    # Single RNN cell
    return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

【讨论】：

不敢相信这么简单！那么如何为每一层设置权重、偏差和初始状态呢？如何运行dynamic_rnn？
偏差，权重由函数内部处理，如果要传递初始状态，可以使用dynamic_rnn中的参数。上面的代码包括dynamic_rnn 调用。
那么，在我的原始代码中，权重和偏差的定义不是必须的而且是错误的？我不应该做X_in = tf.matmul(X, weights['in']) + biases['in'] 吗？ # Define weights weights = { 'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])), 'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes])) } biases = { 'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])), 'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ])) }
哦，好吧，你说的是 LSTM 层之前的密集层，是的，和你之前做的一样。