LSTM 历史长度与预测误差答案

【问题标题】：LSTM history length vs prediction errorLSTM 历史长度与预测误差
【发布时间】：2016-07-11 10:31:09
【问题描述】：

我使用 LSTM 来预测电压时间序列信号中的下一步电压值。我有一个问题：

为什么使用更长的序列（5 或 10 步）来训练 LSTM 并不能提高预测并减少预测误差？（它实际上会降低它的性能 - 参见数字，例如 sequence_length=5 的结果优于 sequence_length=10）

testplot('epochs: 10', 'ratio: 1', 'sequence_length: 10', 'mean error: ', '0.00116802704509')

testplot('epochs: 10', 'ratio: 1', 'sequence_length: 5', 'mean error: ', '0.000495359163296'

（绿色为预测信号，红色为真实信号）

import os
import matplotlib.pyplot as plt
import numpy as np
import time
import csv
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
np.random.seed(1234)



def data_power_consumption(path_to_dataset,
                           sequence_length=50,
                           ratio=1.0):

    max_values = ratio * 2049280

    with open(path_to_dataset) as f:
        data = csv.reader(f, delimiter=",")
        power = []
        nb_of_values = 0
        for line in data:
            try:
                power.append(float(line[4]))
                nb_of_values += 1
            except ValueError:
                pass
            # 2049280.0 is the total number of valid values, i.e. ratio = 1.0
            if nb_of_values >= max_values:
        print "max value", nb_of_values
                break

    print "Data loaded from csv. Formatting..."

    result = []
    for index in range(len(power) - sequence_length):
        result.append(power[index: index + sequence_length])
    result = np.array(result)  # shape (2049230, 50)

    result_mean = result.mean()
    result -= result_mean
    print "Shift : ", result_mean
    print "Data  : ", result.shape

    row = round(0.9 * result.shape[0])
    train = result[:row, :]
    np.random.shuffle(train)
    X_train = train[:, :-1]
    y_train = train[:, -1]
    X_test = result[row:, :-1]
    y_test = result[row:, -1]

    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

    return [X_train, y_train, X_test, y_test]


def build_model():
    model = Sequential()
    layers = [1, 50, 100, 1]

    model.add(LSTM(
        input_dim=layers[0],
        output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(
        output_dim=layers[3]))
    model.add(Activation("linear"))

    start = time.time()
    model.compile(loss="mse", optimizer="adam")  # consider adam
    print "Compilation Time : ", time.time() - start
    return model


def run_network(model=None, data=None):
    global_start_time = time.time()
    epochs = 10
    ratio = 1
    sequence_length = 3
    path_to_dataset = 'TIMBER_DATA_1.csv'

    if data is None:
        print 'Loading data... '
        X_train, y_train, X_test, y_test = data_power_consumption(
            path_to_dataset, sequence_length, ratio)
    else:
        X_train, y_train, X_test, y_test = data

    print '\nData Loaded. Compiling...\n'

    if model is None:
        model = build_model()

    try:
        model.fit(
            X_train, y_train,
            batch_size=512, nb_epoch=epochs, validation_split=0.05)
        predicted = model.predict(X_test)
        predicted = np.reshape(predicted, (predicted.size,))

    print "done"
    except KeyboardInterrupt:
        print 'Training duration (s) : ', time.time() - global_start_time
        return model, y_test, 0

    try:
    fig, ax = plt.subplots()
    txt = "epochs: " + str(epochs), "ratio: " + str(ratio), "sequence_length: " + str(sequence_length) 

    # calculate error (shift predicted by "sequence_length - 1 and apply mean with abs)
    y_test_mean = y_test - np.mean(y_test)
    y_test_mean_shifted = y_test_mean[:-1*(sequence_length - 1)] 
    predicted_mean = predicted - np.mean(predicted)
    predicted_mean_shifted = predicted_mean[(sequence_length - 1):]
    prediction_error = np.mean(abs(y_test_mean_shifted - predicted_mean_shifted))
    text_mean = "mean error: ", str(prediction_error)
    txt = txt + text_mean

    # Now add the legend with some customizations.
    legend = ax.legend(loc='upper center', shadow=True)
    ax.plot(y_test_mean_shifted[900:1000], 'r--', label='Real data')
    ax.plot(predicted_mean_shifted[900:1000], 'g:', label='Predicted')
    fig.text(0.4, 0.2, txt, horizontalalignment='center', verticalalignment='center', transform = ax.transAxes)
    plt.savefig(os.path.join('cern_figures', 'testplot' + str(txt) + '.png'))
        plt.show()
    except Exception as e:
        print str(e)
    print 'Training duration (s) : ', time.time() - global_start_time

    return model, y_test, predicted

# main
if __name__ == "__main__":
    _, y_test_out, predicted_out = run_network()
    #y_test_out_mean = y_test_out - np.mean(y_test_out)
    #predicted_out_mean = predicted_out - np.mean(predicted_out)

【问题讨论】：

标签： machine-learning time-series artificial-intelligence keras lstm

【解决方案1】：

可能是因为您在时间 t 的时间序列不依赖于您在时间 t-10 的时间序列。如果您有一个时间序列(x1,...,xn)，并且xn 和xn-p 之间没有链接，则没有理由使用p 的后退。

例如，如果您想提前一小时预测天气，则不会使用 2 周后退。为什么？因为过去两周的天气对现在的天气没有影响。您将使用过去一小时（或最后一天）的天气。

Ps : 我使用这个天气预报的例子是因为对我来说过去两周的天气和现在之间没有联系。但也许天气预报专家会证明我错了！

干杯！

【讨论】：