Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _ac_loss(self):
num_steps = tf.shape(self.rewards_input)[0]
discounts = tf.ones((num_steps, 1)) * self.discount_factor
rewards = tf.expand_dims(self.rewards_input, axis=1)
values = tf.expand_dims(self.agent.train_values(), axis=1)
bootstrap = tf.expand_dims(self.agent.bootstrap_value(), axis=0)
glr = trfl.generalized_lambda_returns(rewards, discounts, values, bootstrap, lambda_=self.td_lambda)
advantage = tf.squeeze(glr - values)
loss_actor = tf.reduce_mean(-tf.stop_gradient(advantage) * self.agent.train_log_probs())
loss_critic = tf.reduce_mean(advantage ** 2)
result = loss_actor + 0.5 * loss_critic
return result