【发布时间】:2021-01-03 19:58:02
【问题描述】:
我一直在尝试用取自本文的 DQN 解决 OpenAI 月球着陆器游戏
https://arxiv.org/pdf/2006.04938v2.pdf
问题是训练 50 集需要 12 个小时,所以肯定有问题。
import os
import random
import gym
import numpy as np
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
ENV_NAME = "LunarLander-v2"
DISCOUNT_FACTOR = 0.9
LEARNING_RATE = 0.001
MEMORY_SIZE = 2000
TRAIN_START = 1000
BATCH_SIZE = 24
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.99
class MyModel(Model):
def __init__(self, input_size, output_size):
super(MyModel, self).__init__()
self.d1 = Dense(128, input_shape=(input_size,), activation="relu")
self.d2 = Dense(128, activation="relu")
self.d3 = Dense(output_size, activation="linear")
def call(self, x):
x = self.d1(x)
x = self.d2(x)
return self.d3(x)
class DQNSolver():
def __init__(self, observation_space, action_space):
self.exploration_rate = EXPLORATION_MAX
self.action_space = action_space
self.memory = deque(maxlen=MEMORY_SIZE)
self.model = MyModel(observation_space,action_space)
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() < self.exploration_rate:
return random.randrange(self.action_space)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def experience_replay(self):
if len(self.memory) < BATCH_SIZE:
return
batch = random.sample(self.memory, BATCH_SIZE)
state_batch, q_values_batch = [], []
for state, action, reward, state_next, terminal in batch:
# q-value prediction for a given state
q_values_cs = self.model.predict(state)
# target q-value
max_q_value_ns = np.amax(self.model.predict(state_next)[0])
# correction on the Q value for the action used
if terminal:
q_values_cs[0][action] = reward
else:
q_values_cs[0][action] = reward + DISCOUNT_FACTOR * max_q_value_ns
state_batch.append(state[0])
q_values_batch.append(q_values_cs[0])
# train the Q network
self.model.fit(np.array(state_batch),
np.array(q_values_batch),
batch_size = BATCH_SIZE,
epochs = 1, verbose = 0)
self.exploration_rate *= EXPLORATION_DECAY
self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)
def lunar_lander():
env = gym.make(ENV_NAME)
observation_space = env.observation_space.shape[0]
action_space = env.action_space.n
dqn_solver = DQNSolver(observation_space, action_space)
episode = 0
print("Running")
while True:
episode += 1
state = env.reset()
state = np.reshape(state, [1, observation_space])
scores = []
score = 0
while True:
action = dqn_solver.act(state)
state_next, reward, terminal, _ = env.step(action)
state_next = np.reshape(state_next, [1, observation_space])
dqn_solver.remember(state, action, reward, state_next, terminal)
dqn_solver.experience_replay()
state = state_next
score += reward
if terminal:
print("Episode: " + str(episode) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(score))
scores.append(score)
break
if np.mean(scores[-min(100, len(scores)):]) >= 195:
print("Problem is solved in {} episodes.".format(episode))
break
env.close
if __name__ == "__main__":
lunar_lander()
这是日志
root@b11438e3d3e8:~# /usr/bin/python3 /root/test.py
2021-01-03 13:42:38.055593: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.338231: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-01-03 13:42:39.368192: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.368693: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.368729: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.370269: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.371430: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.371704: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.373318: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.374243: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.377939: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.378118: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.378702: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.379127: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.386525: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 3411185000 Hz
2021-01-03 13:42:39.386867: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fb44c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.386891: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2021-01-03 13:42:39.498097: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.498786: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4fdf030 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2021-01-03 13:42:39.498814: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1080, Compute Capability 6.1
2021-01-03 13:42:39.498987: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.499416: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8095GHz coreCount: 20 deviceMemorySize: 7.92GiB deviceMemoryBandwidth: 298.32GiB/s
2021-01-03 13:42:39.499448: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.499483: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2021-01-03 13:42:39.499504: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2021-01-03 13:42:39.499523: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2021-01-03 13:42:39.499543: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2021-01-03 13:42:39.499562: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2021-01-03 13:42:39.499581: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2021-01-03 13:42:39.499643: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500113: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.500730: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2021-01-03 13:42:39.500772: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2021-01-03 13:42:39.915228: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-01-03 13:42:39.915298: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2021-01-03 13:42:39.915322: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2021-01-03 13:42:39.915568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916104: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-01-03 13:42:39.916555: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6668 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080, pci bus id: 0000:01:00.0, compute capability: 6.1)
Running
2021-01-03 13:42:40.267699: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
这是 GPU 统计数据
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.66 Driver Version: 450.66 CUDA Version: 11.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 GeForce GTX 1080 Off | 00000000:01:00.0 On | N/A |
| 0% 53C P2 46W / 198W | 7718MiB / 8111MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
如您所见,TensorFlow 不在 GPU 上计算,而是保留内存,所以我假设这是因为神经网络的输入太小,而是使用 CPU。
为了确保 GPU 安装正确,我从他们的文档中运行了一个示例,它使用了 GPU。也可以在google colabhere上重现问题
是算法问题还是代码问题?
在这种情况下有没有办法利用 GPU?
更新
事实证明,代理学习飞行而不是学习着陆,所以我添加了最大步长 150 来限制剧集时间,但它仍然很慢。
【问题讨论】:
-
嗨,主要原因似乎是dqn_solver.experience_replay()的使用,尝试在每集结束后使用它,而不是在每集的每个步骤中调用它。
标签: python tensorflow keras deep-learning openai-gym