Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def create_sac_optimizers(self):
"""
Creates the Adam optimizers and update ops for SAC, including
the policy, value, and entropy updates, as well as the target network update.
"""
policy_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
entropy_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
value_optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
self.target_update_op = [
tf.assign(target, (1 - self.tau) * target + self.tau * source)
for target, source in zip(
self.target_network.value_vars, self.policy_network.value_vars
)
]
LOGGER.debug("value_vars")
self.print_all_vars(self.policy_network.value_vars)
LOGGER.debug("targvalue_vars")
self.print_all_vars(self.target_network.value_vars)
LOGGER.debug("critic_vars")
self.print_all_vars(self.policy_network.critic_vars)
LOGGER.debug("q_vars")
self.print_all_vars(self.policy_network.q_vars)
LOGGER.debug("policy_vars")
self.print_all_vars(self.policy_network.policy_vars)
self.target_init_op = [
def make_beta_update(self) -> None:
"""
Creates the beta parameter and its updater for GAIL
"""
new_beta = tf.maximum(
self.beta + self.alpha * (self.kl_loss - self.mutual_information), EPSILON
)
with tf.control_dependencies([self.update_batch]):
self.update_beta = tf.assign(self.beta, new_beta)
steps_increment = tf.shape(vector_input)[0]
total_new_steps = tf.add(steps, steps_increment)
# Compute the incremental update and divide by the number of new steps.
input_to_old_mean = tf.subtract(vector_input, running_mean)
new_mean = running_mean + tf.reduce_sum(
input_to_old_mean / tf.cast(total_new_steps, dtype=tf.float32), axis=0
)
# Compute difference of input to the new mean for Welford update
input_to_new_mean = tf.subtract(vector_input, new_mean)
new_variance = running_variance + tf.reduce_sum(
input_to_new_mean * input_to_old_mean, axis=0
)
update_mean = tf.assign(running_mean, new_mean)
update_variance = tf.assign(running_variance, new_variance)
update_norm_step = tf.assign(steps, total_new_steps)
return tf.group([update_mean, update_variance, update_norm_step])
)
]
logger.debug("value_vars")
self.print_all_vars(self.policy_network.value_vars)
logger.debug("targvalue_vars")
self.print_all_vars(self.target_network.value_vars)
logger.debug("critic_vars")
self.print_all_vars(self.policy_network.critic_vars)
logger.debug("q_vars")
self.print_all_vars(self.policy_network.q_vars)
logger.debug("policy_vars")
policy_vars = self.policy.get_trainable_variables()
self.print_all_vars(policy_vars)
self.target_init_op = [
tf.assign(target, source)
for target, source in zip(
self.target_network.value_vars, self.policy_network.value_vars
)
]
self.update_batch_policy = policy_optimizer.minimize(
self.policy_loss, var_list=policy_vars
)
# Make sure policy is updated first, then value, then entropy.
with tf.control_dependencies([self.update_batch_policy]):
self.update_batch_value = value_optimizer.minimize(
self.total_value_loss, var_list=self.policy_network.critic_vars
)
# Add entropy coefficient optimization operation
with tf.control_dependencies([self.update_batch_value]):
def copy_normalization(self, mean, variance, steps):
"""
Copies the mean, variance, and steps into the normalizers of the
input of this SACNetwork. Used to copy the normalizer from the policy network
to the target network.
param mean: Tensor containing the mean.
param variance: Tensor containing the variance
param steps: Tensor containing the number of steps.
"""
update_mean = tf.assign(self.running_mean, mean)
update_variance = tf.assign(self.running_variance, variance)
update_norm_step = tf.assign(self.normalization_steps, steps)
return tf.group([update_mean, update_variance, update_norm_step])
def create_global_steps():
"""Creates TF ops to track and increment global training step."""
global_step = tf.Variable(
0, name="global_step", trainable=False, dtype=tf.int32
)
steps_to_increment = tf.placeholder(
shape=[], dtype=tf.int32, name="steps_to_increment"
)
increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
return global_step, increment_step, steps_to_increment
def copy_normalization(self, mean, variance, steps):
"""
Copies the mean, variance, and steps into the normalizers of the
input of this SACNetwork. Used to copy the normalizer from the policy network
to the target network.
param mean: Tensor containing the mean.
param variance: Tensor containing the variance
param steps: Tensor containing the number of steps.
"""
update_mean = tf.assign(self.running_mean, mean)
update_variance = tf.assign(self.running_variance, variance)
update_norm_step = tf.assign(self.normalization_steps, steps)
return tf.group([update_mean, update_variance, update_norm_step])
self.target_network.value_vars, self.policy_network.value_vars
)
]
LOGGER.debug("value_vars")
self.print_all_vars(self.policy_network.value_vars)
LOGGER.debug("targvalue_vars")
self.print_all_vars(self.target_network.value_vars)
LOGGER.debug("critic_vars")
self.print_all_vars(self.policy_network.critic_vars)
LOGGER.debug("q_vars")
self.print_all_vars(self.policy_network.q_vars)
LOGGER.debug("policy_vars")
self.print_all_vars(self.policy_network.policy_vars)
self.target_init_op = [
tf.assign(target, source)
for target, source in zip(
self.target_network.value_vars, self.policy_network.value_vars
)
]
self.update_batch_policy = policy_optimizer.minimize(
self.policy_loss, var_list=self.policy_network.policy_vars
)
# Make sure policy is updated first, then value, then entropy.
with tf.control_dependencies([self.update_batch_policy]):
self.update_batch_value = value_optimizer.minimize(
self.total_value_loss, var_list=self.policy_network.critic_vars
)
# Add entropy coefficient optimization operation
with tf.control_dependencies([self.update_batch_value]):