ValueError：检查输入时出错：预期的dense_input有2维，但得到了形状为（1、1、2）的数组答案

【问题标题】：ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 2)ValueError：检查输入时出错：预期的dense_input有2维，但得到了形状为（1、1、2）的数组
【发布时间】：2021-11-04 14:53:01
【问题描述】：

编辑：问题已解决。解决方案如下。

尝试构建 RL 模型来处理任务。有两个输入：x 和 y，两者都在 1 到 100 的整数范围内测量。基于这两个输入应该有一个输出（要采取的行动，离散（5））和置信度。

另外，我对这个领域很陌生。请随时问我任何问题或纠正我似乎完全愚蠢/错误的事情。

这是我的程序（导入尚未清理......）：

from abc import ABC
import gym
from tensorflow import keras
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, losses, metrics
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam
import os
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

steps = 10000
episodes = 100
score_requirement = 1000

class PlantEnv(Env, ABC):
    def __init__(self):
        # Actions = water: 0=(none), 1=(3 seconds), 2=(4 seconds), 3=(5 seconds), 4=(6 seconds)
        self.action_space = Discrete(5)

        # Starting Moisture
        moisture = 20 + random.randint(-10, 10)
        # Starting Chance of Rain
        chance_of_rain = 50 + random.randint(-50, 50)

        # Observations
        self.observation_space = Box(low=np.array([0, 0]), high=np.array([100, 100]), dtype=np.int)
        self.state = moisture, chance_of_rain

        # Number of water steps left
        self.water_length = steps

    def step(self, action):
        # Action section
        water = 0

        if action == 1:
            water = 2
        elif action == 2:
            water = 3
        elif action == 3:
            water = 4
        elif action == 4:
            water = 5

        moisture, chance_of_rain = self.state

        moisture += (water * 5)
        self.water_length -= 1

        # Reward Section
        reward = 0
        if 40 <= moisture <= 60:
            reward = 2
        # If moisture is dry or wet
        elif 60 < moisture <= 80 or 20 <= moisture < 40:
            reward = 0.5
        # If moisture is really dry or really wet
        elif 80 < moisture <= 100 or 0 <= moisture < 20:
            reward = -1
        # If moisture is really dry or really wet
        elif 100 < moisture or moisture < 0:
            reward = -2

        # Check if shower is done
        if self.water_length <= 0:
            done = True
        else:
            done = False

        moistureLoss = random.randint(15, 25)
        moisture -= moistureLoss
        chance_of_rain = 50 + random.randint(-50, 50)
        xfactor = chance_of_rain + random.randint(-50, 50)
        if xfactor > 100:
            moisture += (10 + random.randint(0, 15))

        # Set placeholder for info
        info = {}

        # Save current state
        self.state = moisture, chance_of_rain

        # Return step information
        return self.state, reward, done, info

    def reset(self):
        # Reset test environment
        # Set starting moisture
        moisture = 50 + random.randint(-10, 10)
        # Set starting chance of rain array
        chance_of_rain = 50 + random.randint(-50, 50)
        self.state = moisture, chance_of_rain
        # Reset Test time
        self.water_length = steps
        return self.state


def build_model():
    model = Sequential()
    model.add(Flatten(input_shape=(1, 4)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(2, activation='linear'))
    return model


def build_agent(model):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=2,
                   nb_steps_warmup=10, target_model_update=1e-2)
    return dqn


# Create environment
env = PlantEnv()

accepted_scores = []
training_data = []
scores = []
good_episodes = 0

# Create episodes and initiate simulation
for episode in range(1, episodes + 1):
    observation = env.reset()
    done = False
    score = 0
    history = []
    prev_observation = []

    while not done:
        action = env.action_space.sample()
        if observation[0] > 100:
            action = 0
        elif observation[0] < 0:
            action = 4
        observation, reward, done, info = env.step(action)
        score += reward
        if len(prev_observation) > 0:
            history.append([prev_observation, action])
        prev_observation = observation

    if score >= score_requirement:
        good_episodes += 1
        accepted_scores.append(score)
        for data in history:
            if data[1] == 1:
                output = [1]
            else:
                output = [0]

            training_data.append([data[0], output])

    scores.append(score)

if len(accepted_scores) > 0:
    print("Average accepted score: ", np.mean(accepted_scores))
    print("Median accepted score : ", np.median(accepted_scores))
print("Episodes above accepted score of {}: {}/{}\n".format(score_requirement, good_episodes, episodes))

model = build_model()
model.summary()

dqn = build_agent(model)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

第一个模型在尝试 dqn.fit 时出现此错误： ValueError: 检查输入时出错：预期的 dense_input 有 2 维，但得到的数组形状为 (1, 1, 2)

第二个模型在尝试 build_agent 时出现此错误： AttributeError: 'list' 对象没有属性 'shape'

任何关于我做错了什么或如何纠正它的想法都会有很大的帮助。我对自己的环境设置正确有 95% 的信心。

我最初使用第一个模型只是为了看看我是否可以让程序编译和工作。然后，经过进一步研究，我建立了第二个模型，因为我知道它能够给我一个信心等级的行动。两次都出错。

【问题讨论】：

环境有形状吗？我没有看到可用的属性。

标签： python tensorflow keras neural-network openai-gym

【解决方案1】：

您的模型需要 2D 输入，但您将其定义为 1D。这是一个工作示例：

from abc import ABC
import gym
from tensorflow import keras
from gym import Env
from gym.spaces import Discrete, Box
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, losses, metrics
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam
import os
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

steps = 10000
episodes = 100
score_requirement = 1000

class PlantEnv(Env, ABC):
    def __init__(self):
        # Actions = water: 0=(none), 1=(3 seconds), 2=(4 seconds), 3=(5 seconds), 4=(6 seconds)
        self.action_space = Discrete(5)

        # Starting Moisture
        moisture = 20 + random.randint(-10, 10)
        # Starting Chance of Rain
        chance_of_rain = 50 + random.randint(-50, 50)

        # Observations
        self.observation_space = Box(low=np.array([0, 0]), high=np.array([100, 100]), dtype=np.int)
        self.state = moisture, chance_of_rain

        # Number of water steps left
        self.water_length = steps

    def step(self, action):
        # Action section
        water = 0

        if action == 1:
            water = 2
        elif action == 2:
            water = 3
        elif action == 3:
            water = 4
        elif action == 4:
            water = 5

        moisture, chance_of_rain = self.state

        moisture += (water * 5)
        self.water_length -= 1

        # Reward Section
        reward = 0
        if 40 <= moisture <= 60:
            reward = 2
        # If moisture is dry or wet
        elif 60 < moisture <= 80 or 20 <= moisture < 40:
            reward = 0.5
        # If moisture is really dry or really wet
        elif 80 < moisture <= 100 or 0 <= moisture < 20:
            reward = -1
        # If moisture is really dry or really wet
        elif 100 < moisture or moisture < 0:
            reward = -2

        # Check if shower is done
        if self.water_length <= 0:
            done = True
        else:
            done = False

        moistureLoss = random.randint(15, 25)
        moisture -= moistureLoss
        chance_of_rain = 50 + random.randint(-50, 50)
        xfactor = chance_of_rain + random.randint(-50, 50)
        if xfactor > 100:
            moisture += (10 + random.randint(0, 15))

        # Set placeholder for info
        info = {}

        # Save current state
        self.state = moisture, chance_of_rain

        # Return step information
        return self.state, reward, done, info

    def reset(self):
        # Reset test environment
        # Set starting moisture
        moisture = 50 + random.randint(-10, 10)
        # Set starting chance of rain array
        chance_of_rain = 50 + random.randint(-50, 50)
        self.state = moisture, chance_of_rain
        # Reset Test time
        self.water_length = steps
        return self.state


def build_model():
    model = Sequential()
    model.add(Flatten(input_shape=(1, 2)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(5, activation='linear'))
    return model


def build_agent(model):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=5,
                   nb_steps_warmup=10, target_model_update=1e-2)
    return dqn


# Create environment
env = PlantEnv()

accepted_scores = []
training_data = []
scores = []
good_episodes = 0

# Create episodes and initiate simulation
for episode in range(1, episodes + 1):
    observation = env.reset()
    done = False
    score = 0
    history = []
    prev_observation = []

    while not done:
        action = env.action_space.sample()
        if observation[0] > 100:
            action = 0
        elif observation[0] < 0:
            action = 4
        observation, reward, done, info = env.step(action)
        score += reward
        if len(prev_observation) > 0:
            history.append([prev_observation, action])
        prev_observation = observation

    if score >= score_requirement:
        good_episodes += 1
        accepted_scores.append(score)
        for data in history:
            if data[1] == 1:
                output = [1]
            else:
                output = [0]

            training_data.append([data[0], output])

    scores.append(score)

if len(accepted_scores) > 0:
    print("Average accepted score: ", np.mean(accepted_scores))
    print("Median accepted score : ", np.median(accepted_scores))
print("Episodes above accepted score of {}: {}/{}\n".format(score_requirement, good_episodes, episodes))

model = build_model()
model.summary()

dqn = build_agent(model)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

请注意，input_shape 4 等于 states 的数量，输出节点 (2) 等于 actions 的数量。您必须根据数据集更改这些参数。

【讨论】：

感谢您的回复。我很感激。调整我的代码后，我现在得到这个： ValueError: Error when checks input: expected flatten_input to have shape (1, 4) but got array with shape (1, 2)
扁平化层，你的输出层，还有这个参数nb_actions=5
如果我可以添加一个半相关的问题：当尝试使用 dqn.model.predict(np.array([30, 30])) 我得到这个::: ValueError: Error when checks输入：预期 flatten_input 有 3 个维度，但得到的数组形状为 (2, 1)
我不明白我应该使用什么格式来预测。您能提供一些见解吗？
试试dqn.model.predict(np.expand_dims(np.array([30, 30]), axis=0))