Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
hidden_2,
self.z_size,
reuse=reuse,
name="gail_z_mean",
kernel_initializer=ModelUtils.scaled_init(0.01),
)
self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
# Sampled latent code
self.z = z_mean + self.z_sigma * self.noise * self.use_noise
estimate_input = self.z
else:
estimate_input = hidden_2
estimate = tf.layers.dense(
estimate_input,
1,
activation=tf.nn.sigmoid,
name="gail_d_estimate",
reuse=reuse,
)
return estimate, z_mean, concat_input
name="gail_d_hidden_1",
reuse=reuse,
)
hidden_2 = tf.layers.dense(
hidden_1,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_2",
reuse=reuse,
)
z_mean = None
if self.use_vail:
# Latent representation
z_mean = tf.layers.dense(
hidden_2,
self.z_size,
reuse=reuse,
name="gail_z_mean",
kernel_initializer=ModelUtils.scaled_init(0.01),
)
self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
# Sampled latent code
self.z = z_mean + self.z_sigma * self.noise * self.use_noise
estimate_input = self.z
else:
estimate_input = hidden_2
estimate = tf.layers.dense(
axis=1,
)
hidden = tf.concat([hidden, prev_action_oh], axis=1)
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden, memory_out = self.create_recurrent_encoder(
hidden, self.memory_in, self.sequence_length
)
self.memory_out = tf.identity(memory_out, name="recurrent_out")
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=LearningModel.scaled_init(0.01),
)
)
self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
output, _, normalized_logits = self.create_discrete_action_masking_layer(
self.all_log_probs, self.action_masks, self.act_size
)
self.policy_memory_in,
self.sequence_length,
name="lstm_policy",
)
self.policy_memory_out = memory_out
with tf.variable_scope(scope):
mu = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="mu",
kernel_initializer=LearningModel.scaled_init(0.01),
)
# Policy-dependent log_sigma_sq
log_sigma_sq = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="log_std",
kernel_initializer=LearningModel.scaled_init(0.01),
)
self.log_sigma_sq = tf.clip_by_value(log_sigma_sq, LOG_STD_MIN, LOG_STD_MAX)
sigma_sq = tf.exp(self.log_sigma_sq)
# Do the reparameterization trick
policy_ = mu + tf.random_normal(tf.shape(mu)) * sigma_sq
_gauss_pre = -0.5 * (
((policy_ - mu) / (tf.exp(self.log_sigma_sq) + EPSILON)) ** 2
)
hidden_value, memory_value_out = self.create_recurrent_encoder(
hidden_streams[1],
self.memory_in[:, _half_point:],
self.sequence_length,
name="lstm_value",
)
self.memory_out = tf.concat(
[memory_policy_out, memory_value_out], axis=1, name="recurrent_out"
)
else:
hidden_policy = hidden_streams[0]
hidden_value = hidden_streams[1]
mu = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
kernel_initializer=LearningModel.scaled_init(0.01),
reuse=tf.AUTO_REUSE,
)
self.log_sigma_sq = tf.get_variable(
"log_sigma_squared",
[self.act_size[0]],
dtype=tf.float32,
initializer=tf.zeros_initializer(),
)
sigma_sq = tf.exp(self.log_sigma_sq)
axis=1,
)
hidden_policy = tf.concat([hidden_policy, prev_action_oh], axis=1)
hidden_policy, memory_out = self.create_recurrent_encoder(
hidden_policy,
self.policy_memory_in,
self.sequence_length,
name="lstm_policy",
)
self.policy_memory_out = memory_out
with tf.variable_scope(scope):
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden_policy,
size,
activation=None,
use_bias=False,
kernel_initializer=tf.initializers.variance_scaling(0.01),
)
)
all_logits = tf.concat(policy_branches, axis=1, name="action_probs")
output, normalized_probs, normalized_logprobs = self.create_discrete_action_masking_layer(
all_logits, self.action_masks, self.act_size
)
self.action_probs = normalized_probs
# Really, this is entropy, but it has an analogous purpose to the log probs in the
:param reuse: Whether or not to reuse variables. Useful for creating Q of policy.
:param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
"""
with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
q1_hidden = self.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q1_encoder", reuse
)
if self.use_recurrent:
q1_hidden, memory_out = self.create_recurrent_encoder(
q1_hidden, self.q1_memory_in, self.sequence_length, name="lstm_q1"
)
self.q1_memory_out = memory_out
q1_heads = {}
for name in stream_names:
_q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = self.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
)
if self.use_recurrent:
q2_hidden, memory_out = self.create_recurrent_encoder(
q2_hidden, self.q2_memory_in, self.sequence_length, name="lstm_q2"
)
self.q2_memory_out = memory_out
q2_heads = {}
for name in stream_names:
_q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
q2_hidden = self.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "q2_encoder", reuse
)
if self.use_recurrent:
q2_hidden, memory_out = self.create_recurrent_encoder(
q2_hidden, self.q2_memory_in, self.sequence_length, name="lstm_q2"
)
self.q2_memory_out = memory_out
q2_heads = {}
for name in stream_names:
_q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
q2_heads[name] = _q2
q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)
return q1_heads, q2_heads, q1, q2
self, state_in: tf.Tensor, action_in: tf.Tensor, done_in: tf.Tensor, reuse: bool
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
"""
Creates the encoder for the discriminator
:param state_in: The encoded observation input
:param action_in: The action input
:param done_in: The done flags input
:param reuse: If true, the weights will be shared with the previous encoder created
"""
with tf.variable_scope("GAIL_model"):
if self.use_actions:
concat_input = tf.concat([state_in, action_in, done_in], axis=1)
else:
concat_input = state_in
hidden_1 = tf.layers.dense(
concat_input,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_1",
reuse=reuse,
)
hidden_2 = tf.layers.dense(
hidden_1,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_2",
reuse=reuse,
)
z_mean = None