序列预测 LSTM 神经网络落后答案

【问题标题】：Sequence prediction LSTM neural network is falling behind序列预测 LSTM 神经网络落后
【发布时间】：2017-08-02 15:17:47
【问题描述】：

我正在尝试实现一个猜谜游戏，用户猜硬币翻转，神经网络试图预测他的猜测（当然没有后见之明）。游戏应该是实时的，它会适应用户。我使用了 synaptic js，因为它看起来很可靠。

然而我似乎无法克服一个绊脚石：神经网络不断地在猜测中落后。比如，如果用户按下

heads heads tail heads heads tail heads heads tail

它确实识别出模式，但它落后了两个动作，例如

tail heads heads tail heads heads tail heads heads

我尝试了无数种策略：

随着用户点击正面或反面来训练网络
拥有用户条目日志并清除网络内存并使用直到猜测点的所有条目重新训练它
通过多种方式混合和匹配训练与激活
尝试移动到感知器，一次通过一系列动作（效果比 LSTM 差）
我忘记了很多其他事情

架构：

2 个输入，无论用户在上一轮点击的是正面还是反面
2 个输出，预测用户接下来会点击什么（这将在下一回合输入）

我已经在隐藏层和各种训练时期尝试了 10-30 个神经元，但我总是遇到同样的问题！

我将发布我正在使用的 bucklescript 代码。

我做错了什么？还是我的期望根本无法实时预测用户猜测？有没有替代算法？

class type _nnet = object
    method activate : float array -> float array
    method propagate : float -> float array -> unit
    method clone : unit -> _nnet Js.t
    method clear : unit -> unit
end [@bs]

type nnet = _nnet Js.t

external ltsm : int -> int -> int -> nnet = "synaptic.Architect.LSTM" [@@bs.new]
external ltsm_2 : int -> int -> int -> int -> nnet = "synaptic.Architect.LSTM" [@@bs.new]
external ltsm_3 : int -> int -> int -> int -> int -> nnet = "synaptic.Architect.LSTM" [@@bs.new]
external perceptron : int -> int -> int -> nnet = "synaptic.Architect.Perceptron" [@@bs.new]

type id
type dom
  (** Abstract type for id object *)

external dom : dom = "document" [@@bs.val]

external get_by_id : dom -> string -> id =
  "getElementById" [@@bs.send]

external set_text : id -> string -> unit =
  "innerHTML" [@@bs.set]

(*THE CODE*)

let current_net = ltsm 2 16 2
let training_momentum = 0.1
let training_epochs = 20
let training_memory = 16

let rec train_sequence_rec n the_array =
    if n > 0 then (
        current_net##propagate training_momentum the_array;
        train_sequence_rec (n - 1) the_array
    )

let print_arr prefix the_arr =
    print_endline (prefix ^ " " ^
        (Pervasives.string_of_float (Array.get the_arr 0)) ^ " " ^
        (Pervasives.string_of_float (Array.get the_arr 1)))

let blank_arr =
    fun () ->
    let res = Array.make_float 2 in
    Array.fill res 0 2 0.0;
    res

let derive_guess_from_array the_arr =
    Array.get the_arr 0 < Array.get the_arr 1

let set_array_inp the_value the_arr =
    if the_value then
        Array.set the_arr 1 1.0
    else
        Array.set the_arr 0 1.0

let output_array the_value =
    let farr = blank_arr () in
    set_array_inp the_value farr;
    farr

let by_id the_id = get_by_id (dom) the_id

let update_prediction_in_ui the_value =
    let elem = by_id "status-text" in
    if not the_value then
        set_text elem "Predicted Heads"
    else
        set_text elem "Predicted Tails"

let inc_ref the_ref = the_ref := !the_ref + 1

let total_guesses_count = ref 0
let steve_won_count = ref 0

let sequence = Array.make training_memory false
let seq_ptr = ref 0
let seq_count = ref 0

let push_seq the_value =
    Array.set sequence (!seq_ptr mod training_memory) the_value;
    inc_ref seq_ptr;
    if !seq_count < training_memory then
        inc_ref seq_count

let seq_start_offset () =
    (!seq_ptr - !seq_count) mod training_memory

let traverse_seq the_fun =
    let incr = ref 0 in
    let begin_at = seq_start_offset () in
    let next_i () = (begin_at + !incr) mod training_memory in
    let rec loop () =
        if !incr < !seq_count then (
            let cval = Array.get sequence (next_i ()) in
            the_fun cval;
            inc_ref incr;
            loop ()
        ) in
    loop ()

let first_in_sequence () =
    Array.get sequence (seq_start_offset ())

let last_in_sequence_n n =
    let curr = ((!seq_ptr - n) mod training_memory) - 1 in
    if curr >= 0 then
        Array.get sequence curr
    else
        false

let last_in_sequence () = last_in_sequence_n 0

let perceptron_input last_n_fields =
    let tot_fields = (3 * last_n_fields) in
    let out_arr = Array.make_float tot_fields in
    Array.fill out_arr 0 tot_fields 0.0;
    let rec loop count =
        if count < last_n_fields then (
            if count >= !seq_count then (
                Array.set out_arr (3 * count) 1.0;
            ) else (
                let curr = last_in_sequence_n count in
                let the_slot = if curr then 1 else 0 in
                Array.set out_arr (3 * count + 1 + the_slot) 1.0
            );
            loop (count + 1)
        ) in
    loop 0;
    out_arr

let steve_won () = inc_ref steve_won_count

let propogate_n_times the_output =
    let rec loop cnt =
        if cnt < training_epochs then (
            current_net##propagate training_momentum the_output;
            loop (cnt + 1)
        ) in
    loop 0

let print_prediction prev exp pred =
    print_endline ("Current training, previous: " ^ (Pervasives.string_of_bool prev) ^
        ", expected: " ^ (Pervasives.string_of_bool exp)
        ^ ", predicted: " ^ (Pervasives.string_of_bool pred))

let train_from_sequence () =
    current_net##clear ();
    let previous = ref (first_in_sequence ()) in
    let count = ref 0 in
    print_endline "NEW TRAINING BATCH";
    traverse_seq (fun i ->
        let inp_arr = output_array !previous in
        let out_arr = output_array i in
        let act_res = current_net##activate inp_arr in
        print_prediction !previous i (derive_guess_from_array act_res);
        propogate_n_times out_arr;
        previous := i;
        inc_ref count
    )

let update_counts_in_ui () =
    let tot = by_id "total-count" in
    let won = by_id "steve-won-count" in
    set_text tot (Pervasives.string_of_int !total_guesses_count);
    set_text won (Pervasives.string_of_int !steve_won_count)

let train_sequence (the_value : bool) =
    train_from_sequence ();
    let last_guess = (last_in_sequence ()) in
    let before_train = current_net##activate (output_array last_guess) in
    let act_result = derive_guess_from_array before_train in
    (*side effects*)

    push_seq the_value;

    inc_ref total_guesses_count;
    if the_value = act_result then steve_won ();
    print_endline "CURRENT";
    print_prediction last_guess the_value act_result;
    update_prediction_in_ui act_result;
    update_counts_in_ui ()

let guess (user_guess : bool) =
    train_sequence user_guess

let () = ()

【问题讨论】：

标签： neural-network lstm recurrent-neural-network

【解决方案1】：

在每次训练迭代之前清除网络上下文是解决方法

您的代码中的问题是您的网络是循环训练的。而不是训练1 > 2 > 3 RESET 1 > 2 > 3，而是训练网络1 > 2 > 3 > 1 > 2 > 3。这使您的网络认为3 之后的值应该是1。

其次，没有理由使用 2 个输出神经元。有一个就足够了，输出1 等于正面，输出0 等于反面。我们只会对输出进行四舍五入。

我在这段代码中使用了Neataptic，而不是使用 Synaptic，它是 Synaptic 的改进版本，增加了功能和遗传算法。

代码

代码相当简单。稍微丑化一下，是这样的：

var network = new neataptic.Architect.LSTM(1,12,1);;
var previous = null;
var trainingData = [];

// side is 1 for heads and 0 for tails
function onSideClick(side){
  if(previous != null){
    trainingData.push({ input: [previous], output: [side] });

    // Train the data
    network.train(trainingData, {
      log: 500,
      iterations: 5000,
      error: 0.03,
      clear: true,
      rate: 0.05,
    });

    // Iterate over previous sets to get into the 'flow'
    for(var i in trainingData){
      var input = trainingData[i].input;
      var output = Math.round(network.activate([input]));
    }

    // Activate network with previous output, aka make a prediction
    var input = output;
    var output = Math.round(network.activate([input]))
  }

  previous = side;
}

Run the code here!

此代码的密钥是clear: true。这基本上确保网络知道它是从第一个训练样本开始，而不是从最后一个训练样本继续。 LSTM 的大小、迭代次数和学习率是完全可定制的。

成功了！

请注意，网络学习它需要大约 2 倍的模式。

它确实存在非重复模式的问题：

【讨论】：

是的，我也一直在尝试解决这个问题，比如清除上下文和重新训练网络直到猜测的程度以及我忘记的无数其他东西。但至少您提供了很好的见解，很高兴阅读这些内容！
我想我找到了一个效果很好的修复程序。很快就会在某个地方提供新的答案。我在回答this 问题时偶然发现了它。