【问题标题】:Tensorflow JS, custom loss function, putting the pieces togetherTensorflow JS,自定义损失函数,拼凑
【发布时间】:2018-12-28 02:22:07
【问题描述】:

我想训练一个模型来玩游戏。我从像素示例中查看了 Ping 以进行强化学习并基于我的代码。

但是,与该示例相反,在我的游戏中,无法从单帧预测最佳移动。它更像是一场扑克游戏,您需要考虑以前的动作。这就是我选择带有LSTM 的模型的原因,就像在生成文本的教程中一样。

我想出了以下几段代码,但很难将这些部分组合在一起:

/* This function should be fine*/
function createModel(lstmLayerSizes) {
  if (!Array.isArray(lstmLayerSizes)) {
    lstmLayerSizes = [lstmLayerSizes];
  }

  _model = tf.sequential();
  for (let i = 0; i < lstmLayerSizes.length; ++i) {
    const lstmLayerSize = lstmLayerSizes[i];
    _model.add(tf.layers.lstm({
      units: lstmLayerSize,
      returnSequences: i < lstmLayerSizes.length - 1,
      inputShape: i === 0 ? [_sampleLength, _indicatorCount] : undefined
    }));
  }
  _model.add(
    tf.layers.dense({
      units: numberOfActions,
      activation: 'softmax'
    }));
}

function compileModel(learningRate) {
  _optimizer = tf.train.rmsprop(learningRate);
  _model.compile({
    optimizer: _optimizer,
    loss: myLossFunction
  });
}

/* Should only have 2 parameters */
function myLossFunction(actions, labels, rewards) {
  var tsActions = tf.oneHot(actions, 3);
  var tsActionsFloat32 = tf.cast(tsActions, 'float32');
  var cross_entropies = tf.losses.softmaxCrossEntropy(tsActionsFloat32, labels);
  var loss = tf.sum(tf.mul(rewards, cross_entropies));
  return loss;
}

async function train(game) {
  const numEpochs = 10;
  const numOfSteps = 50;

  var observations = [];
  var predictions = [];
  var actions = [];

  compileModel(0.01);
  for (let i = 0; i < numEpochs; ++i) {
    console.log(`epoch ${i}, start`)
    var step = game.init();
    for (let s = 0; s < numOfSteps; s++) {
      var observation = step.context;
      var prediction = predict(observation);
      var action = sampleFromProbability(prediction);
      step = game.doAction(action);

      observations.push([observation]);
      predictions = predictions.concat(Array.from(prediction.dataSync()));
      actions.push(action);
    }

    var rewards = calculateRewards(step, referenceFrame, numOfSteps);
    var tsActions = tf.tensor1d(actions);
    var tsPredictions = tf.tensor2d(predictions, [50, 3], 'float32');
    var tsRewards = tf.tensor1d(rewards);
    var tsObservations = tf.tensor3d(observations, [50, 1, 5])


    /* Gives a runtime error saying no variables can be found */
    //_optimizer.minimize(() => { return myLossFunction(actions, tsPredictions, rewards); } );

    /* invallid loss function, and should take the rewards into account */
    //await _model.fit(tsObservations, tsPredictions);

    console.log(`epoch ${i}, stop`)

    actions = [];
    predictions = [];
    rewards = [];
  }
}

关于如何进行的任何建议?

【问题讨论】:

    标签: javascript tensorflow lstm reinforcement-learning tensorflow.js


    【解决方案1】:

    我认为您最好的选择是使用演员 - 评论家网络。我也在尝试类似的实现。 你可以在这里查看: https://sergiuionescu.github.io/esp32-auto-car/sym/sym.html

    【讨论】:

      猜你喜欢
      • 2018-10-28
      • 2017-12-29
      • 2022-06-13
      • 2020-10-21
      • 2019-12-16
      • 2020-07-29
      • 1970-01-01
      • 2021-10-26
      • 2020-11-27
      相关资源
      最近更新 更多