【问题标题】:ValueError: No gradients provided for any variable in Tensorflow 2.5ValueError:没有为 Tensorflow 2.5 中的任何变量提供梯度
【发布时间】:2021-10-06 20:30:03
【问题描述】:

我正在执行强化学习,需要在自定义环境中训练演员和评论家神经网络。我的网络和 RL 代理有以下代码:

import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.keras.losses as kls

class critic(tf.keras.Model):
    ## Critic NN
    def __init__(self):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(64,activation='relu')
        self.v = tf.keras.layers.Dense(1, activation = None)

    def call(self, input_data):
        x = self.d1(input_data)
        v = self.v(x)
        return v
    
class actor(tf.keras.Model):
    ## Actor NN
    def __init__(self):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(64,activation='relu')
        self.a = tf.keras.layers.Dense(4,activation='softmax')

    def call(self, input_data):
        x = self.d1(input_data)
        a = self.a(x)
        return a

class rlalgorithm:
    ## RL Agent that trains the above NNs based on data from environment
    def __init__(self, actions, learning_rate=0.1):
        ## Display name for graphing performance
        self.display_name="A2C"
        ## Root Mean Square Optimizer for minimizing A2C losses
        self.a_opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
        self.c_opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
        ## Initialize models
        self.actor = actor()
        self.critic = critic()
        ## Define training constants and variables
        learn_rate = tf.constant(learning_rate, dtype=tf.float32)
        self.reward = tf.Variable(initial_value=0, dtype=tf.float32)
        self.state = tf.Variable(initial_value=tf.zeros(shape=(1, 4)), dtype=tf.float32, shape=(1, 4))
        self.next_state = tf.Variable(initial_value=tf.zeros(shape=(1, 4)), dtype=tf.float32, shape=(1, 4))
        self.action = tf.Variable(initial_value=0, dtype=tf.float32)
        ## The graph that produces the advantage
        advantage = self.reward + learn_rate*self.critic(self.next_state) - self.critic(self.state)
        ## Graph that produces losses
        dist = tfp.distributions.Categorical(probs=self.actor(self.state), dtype=tf.float32)
        self.actor_loss = dist.log_prob(self.action)*advantage
        self.critic_loss = advantage**2

    def choose_action(self, state):
        ## Agent chooses action to proceed to next state
        prob = self.actor(tf.convert_to_tensor([state]))
        dist = tfp.distributions.Categorical(probs=prob, dtype=tf.float32)
        action = dist.sample()
        return int(action.numpy()[0])

    def learn(self, s, a, r, s_):
        ## Based on chosen action, learn from result

        ## Assign training variables for this state-action outcome
        self.reward = self.reward.assign(r)
        self.state = self.state.assign(tf.convert_to_tensor([s]))
        self.next_state = self.next_state.assign(tf.convert_to_tensor([s_]))
        self.action = self.action.assign(a)

        ## Generate the loss gradient for actor
        with tf.GradientTape() as tape:
            actor_grad = tape.gradient(self.actor_loss, self.actor.trainable_variables)
            self.a_opt.apply_gradients(zip(actor_grad, self.actor.trainable_variables))

        ## Generate the loss gradient for critic
        with tf.GradientTape() as tape:
            critic_grad = tape.gradient(self.critic_loss, self.critic.trainable_variables)
            self.c_opt.apply_gradients(zip(critic_grad, self.critic.trainable_variables))

        ## Environment uses this, not relevant to learning
        return s_, self.choose_action(s_)

我收到以下错误:

ValueError: No gradients provided for any variable: ['actor/dense/kernel:0', 'actor/dense/bias:0', 'actor/dense_1/kernel:0', 'actor/dense_1/bias:0'].

我多次看到这个问题被问到,但以前的解决方案似乎都不适用于我的情况。不幸的是,我无法提供此代理运行的环境,但错误仅包含在上述文件中。

我已阅读文档并尝试使用优化器最小化函数进行类似的实现,结果相同。

我怀疑这个问题与张量流图的定义方式有关,但我不确定到底是什么问题。

感谢任何和所有的帮助。

【问题讨论】:

    标签: python keras tensorflow2.0


    【解决方案1】:

    我自己解决了这个问题。

    我不明白的是 tf.GradientTape 的正确用法。在 with 块中,我需要执行计算损失的操作,以便找到梯度。

    这是更新后的学习功能,供其他人参考:

    def learn(self, s, a, r, s_):
            ## Based on chosen action, learn from result
            ## Assign training variables for this state-action outcome
            self.reward = self.reward.assign(r)
            self.state = self.state.assign(tf.convert_to_tensor([s]))
            self.next_state = self.next_state.assign(tf.convert_to_tensor([s_]))
            self.action = self.action.assign(a)
    
            ## Generate the loss gradient for critic
            with tf.GradientTape() as tape:
                advantage = self.reward + self.learn_rate*self.critic(self.next_state) - self.critic(self.state)
                critic_loss = advantage**2
                critic_grad = tape.gradient(critic_loss, self.critic.trainable_variables)
                self.c_opt.apply_gradients(zip(critic_grad, self.critic.trainable_variables))
    
            ## Generate the loss gradient for actor
            with tf.GradientTape() as tape:
                dist = tfp.distributions.Categorical(probs=self.actor(self.state), dtype=tf.float32)
                actor_loss = dist.log_prob(self.action)*self.advantage
                actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables)
                self.a_opt.apply_gradients(zip(actor_grad, self.actor.trainable_variables))
    
            ## Environment uses this, not relevant to learning
            return s_, self.choose_action(s_)
    

    【讨论】:

      猜你喜欢
      • 2020-10-31
      • 2020-09-30
      • 1970-01-01
      • 2021-09-08
      • 2021-04-17
      • 1970-01-01
      • 2020-08-28
      • 2020-07-29
      • 2021-07-11
      相关资源
      最近更新 更多