Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
three_fourths_m_size = m_size * 3 // 4
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
shape=[None, three_fourths_m_size],
name="other_recurrent_in",
)
# Concat and use this as the "placeholder"
# for training
self.memory_in = tf.concat(
[self.other_memory_in, self.inference_memory_in], axis=1
)
# Re-break-up for each network
num_mems = 4
mem_ins = []
for i in range(num_mems):
_start = m_size // num_mems * i
_end = m_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
self.policy_memory_in = mem_ins[3]
)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
three_fourths_m_size = m_size * 3 // 4
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
shape=[None, three_fourths_m_size],
name="other_recurrent_in",
)
# Concat and use this as the "placeholder"
# for training
self.memory_in = tf.concat(
[self.other_memory_in, self.inference_memory_in], axis=1
)
# Re-break-up for each network
num_mems = 4
mem_ins = []
for i in range(num_mems):
_start = m_size // num_mems * i
_end = m_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
self.policy_memory_in = mem_ins[3]
hidden = hidden_streams[0]
if self.use_recurrent:
self.prev_action = tf.placeholder(
shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
)
prev_action_oh = tf.concat(
[
tf.one_hot(self.prev_action[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
hidden = tf.concat([hidden, prev_action_oh], axis=1)
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden, memory_out = self.create_recurrent_encoder(
hidden, self.memory_in, self.sequence_length
)
self.memory_out = tf.identity(memory_out, name="recurrent_out")
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=LearningModel.scaled_init(0.01),
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=LearningModel.scaled_init(0.01),
)
)
self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
output, _, normalized_logits = self.create_discrete_action_masking_layer(
self.all_log_probs, self.action_masks, self.act_size
)
self.output = tf.identity(output)
self.normalized_logits = tf.identity(normalized_logits, name="action")
self.create_value_heads(self.stream_names, hidden)
self.action_holder = tf.placeholder(
shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
)
self.action_oh = tf.concat(
[
]
inputs_to_op_nodes = list(
flatten([list(flatten(n.input)) for n in op_nodes])
)
inputs_to_op_nodes = replace_strings_in_list(
inputs_to_op_nodes, map_ignored_layer_to_its_input
)
inputs_to_op_nodes = [i.split(":")[0] for i in inputs_to_op_nodes]
const_nodes_by_name = {n.name: n for n in const_nodes}
tensors = []
for i in inputs_to_op_nodes:
if i in model_tensors:
src = model_tensors[i]
tensors += [
Struct(
name=i,
obj=src,
shape=get_tensor_dims(src),
data=get_tensor_data(src),
)
]
elif i in const_nodes_by_name:
src = const_nodes_by_name[i].attr["value"].tensor
tensors += [
Struct(
name=i,
obj=src,
shape=get_tensor_dims(src),
data=get_tensor_data(src),
)
]
]
inputs_to_op_nodes = list(
flatten([list(flatten(n.input)) for n in op_nodes])
)
inputs_to_op_nodes = replace_strings_in_list(
inputs_to_op_nodes, map_ignored_layer_to_its_input
)
inputs_to_op_nodes = [i.split(":")[0] for i in inputs_to_op_nodes]
const_nodes_by_name = {n.name: n for n in const_nodes}
tensors = []
for i in inputs_to_op_nodes:
if i in model_tensors:
src = model_tensors[i]
tensors += [
Struct(
name=i,
obj=src,
shape=get_tensor_dims(src),
data=get_tensor_data(src),
)
]
elif i in const_nodes_by_name:
src = const_nodes_by_name[i].attr["value"].tensor
tensors += [
Struct(
name=i,
obj=src,
shape=get_tensor_dims(src),
data=get_tensor_data(src),
)
]
hidden_2,
self.z_size,
reuse=reuse,
name="gail_z_mean",
kernel_initializer=ModelUtils.scaled_init(0.01),
)
self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
# Sampled latent code
self.z = z_mean + self.z_sigma * self.noise * self.use_noise
estimate_input = self.z
else:
estimate_input = hidden_2
estimate = tf.layers.dense(
estimate_input,
1,
activation=tf.nn.sigmoid,
name="gail_d_estimate",
reuse=reuse,
)
return estimate, z_mean, concat_input
name="gail_d_hidden_1",
reuse=reuse,
)
hidden_2 = tf.layers.dense(
hidden_1,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_2",
reuse=reuse,
)
z_mean = None
if self.use_vail:
# Latent representation
z_mean = tf.layers.dense(
hidden_2,
self.z_size,
reuse=reuse,
name="gail_z_mean",
kernel_initializer=ModelUtils.scaled_init(0.01),
)
self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
# Sampled latent code
self.z = z_mean + self.z_sigma * self.noise * self.use_noise
estimate_input = self.z
else:
estimate_input = hidden_2
estimate = tf.layers.dense(
axis=1,
)
hidden = tf.concat([hidden, prev_action_oh], axis=1)
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden, memory_out = self.create_recurrent_encoder(
hidden, self.memory_in, self.sequence_length
)
self.memory_out = tf.identity(memory_out, name="recurrent_out")
policy_branches = []
for size in self.act_size:
policy_branches.append(
tf.layers.dense(
hidden,
size,
activation=None,
use_bias=False,
kernel_initializer=LearningModel.scaled_init(0.01),
)
)
self.all_log_probs = tf.concat(policy_branches, axis=1, name="action_probs")
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
output, _, normalized_logits = self.create_discrete_action_masking_layer(
self.all_log_probs, self.action_masks, self.act_size
)
self.policy_memory_in,
self.sequence_length,
name="lstm_policy",
)
self.policy_memory_out = memory_out
with tf.variable_scope(scope):
mu = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="mu",
kernel_initializer=LearningModel.scaled_init(0.01),
)
# Policy-dependent log_sigma_sq
log_sigma_sq = tf.layers.dense(
hidden_policy,
self.act_size[0],
activation=None,
name="log_std",
kernel_initializer=LearningModel.scaled_init(0.01),
)
self.log_sigma_sq = tf.clip_by_value(log_sigma_sq, LOG_STD_MIN, LOG_STD_MAX)
sigma_sq = tf.exp(self.log_sigma_sq)
# Do the reparameterization trick
policy_ = mu + tf.random_normal(tf.shape(mu)) * sigma_sq
_gauss_pre = -0.5 * (
((policy_ - mu) / (tf.exp(self.log_sigma_sq) + EPSILON)) ** 2