【发布时间】:2018-12-28 02:22:07
【问题描述】:
我想训练一个模型来玩游戏。我从像素示例中查看了 Ping 以进行强化学习并基于我的代码。
但是,与该示例相反,在我的游戏中,无法从单帧预测最佳移动。它更像是一场扑克游戏,您需要考虑以前的动作。这就是我选择带有LSTM 的模型的原因,就像在生成文本的教程中一样。
我想出了以下几段代码,但很难将这些部分组合在一起:
/* This function should be fine*/
function createModel(lstmLayerSizes) {
if (!Array.isArray(lstmLayerSizes)) {
lstmLayerSizes = [lstmLayerSizes];
}
_model = tf.sequential();
for (let i = 0; i < lstmLayerSizes.length; ++i) {
const lstmLayerSize = lstmLayerSizes[i];
_model.add(tf.layers.lstm({
units: lstmLayerSize,
returnSequences: i < lstmLayerSizes.length - 1,
inputShape: i === 0 ? [_sampleLength, _indicatorCount] : undefined
}));
}
_model.add(
tf.layers.dense({
units: numberOfActions,
activation: 'softmax'
}));
}
function compileModel(learningRate) {
_optimizer = tf.train.rmsprop(learningRate);
_model.compile({
optimizer: _optimizer,
loss: myLossFunction
});
}
/* Should only have 2 parameters */
function myLossFunction(actions, labels, rewards) {
var tsActions = tf.oneHot(actions, 3);
var tsActionsFloat32 = tf.cast(tsActions, 'float32');
var cross_entropies = tf.losses.softmaxCrossEntropy(tsActionsFloat32, labels);
var loss = tf.sum(tf.mul(rewards, cross_entropies));
return loss;
}
async function train(game) {
const numEpochs = 10;
const numOfSteps = 50;
var observations = [];
var predictions = [];
var actions = [];
compileModel(0.01);
for (let i = 0; i < numEpochs; ++i) {
console.log(`epoch ${i}, start`)
var step = game.init();
for (let s = 0; s < numOfSteps; s++) {
var observation = step.context;
var prediction = predict(observation);
var action = sampleFromProbability(prediction);
step = game.doAction(action);
observations.push([observation]);
predictions = predictions.concat(Array.from(prediction.dataSync()));
actions.push(action);
}
var rewards = calculateRewards(step, referenceFrame, numOfSteps);
var tsActions = tf.tensor1d(actions);
var tsPredictions = tf.tensor2d(predictions, [50, 3], 'float32');
var tsRewards = tf.tensor1d(rewards);
var tsObservations = tf.tensor3d(observations, [50, 1, 5])
/* Gives a runtime error saying no variables can be found */
//_optimizer.minimize(() => { return myLossFunction(actions, tsPredictions, rewards); } );
/* invallid loss function, and should take the rewards into account */
//await _model.fit(tsObservations, tsPredictions);
console.log(`epoch ${i}, stop`)
actions = [];
predictions = [];
rewards = [];
}
}
关于如何进行的任何建议?
【问题讨论】:
标签: javascript tensorflow lstm reinforcement-learning tensorflow.js