Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
three_fourths_m_size = m_size * 3 // 4
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
shape=[None, three_fourths_m_size],
name="other_recurrent_in",
)
# Concat and use this as the "placeholder"
# for training
self.memory_in = tf.concat(
[self.other_memory_in, self.inference_memory_in], axis=1
)
# Re-break-up for each network
num_mems = 4
mem_ins = []
for i in range(num_mems):
_start = m_size // num_mems * i
_end = m_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
self.policy_memory_in = mem_ins[3]
)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
three_fourths_m_size = m_size * 3 // 4
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
shape=[None, three_fourths_m_size],
name="other_recurrent_in",
)
# Concat and use this as the "placeholder"
# for training
self.memory_in = tf.concat(
[self.other_memory_in, self.inference_memory_in], axis=1
)
# Re-break-up for each network
num_mems = 4
mem_ins = []
for i in range(num_mems):
_start = m_size // num_mems * i
_end = m_size // num_mems * (i + 1)
mem_ins.append(self.memory_in[:, _start:_end])
self.value_memory_in = mem_ins[0]
self.q1_memory_in = mem_ins[1]
self.q2_memory_in = mem_ins[2]
self.policy_memory_in = mem_ins[3]
:param h_size: Size of hidden linear layers.
:param num_layers: Number of hidden linear layers.
:param vis_encode_type: Type of visual encoder to use if visual input.
"""
if self.use_recurrent:
self.prev_action = tf.placeholder(
shape=[None, len(self.act_size)], dtype=tf.int32, name="prev_action"
)
prev_action_oh = tf.concat(
[
tf.one_hot(self.prev_action[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
hidden_policy = tf.concat([encoded, prev_action_oh], axis=1)
self.memory_in = tf.placeholder(
shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
)
hidden_policy, memory_policy_out = ModelUtils.create_recurrent_encoder(
hidden_policy,
self.memory_in,
self.sequence_length_ph,
name="lstm_policy",
)
self.memory_out = tf.identity(memory_policy_out, "recurrent_out")
else:
hidden_policy = encoded
self.action_masks = tf.placeholder(
self.action_masks = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
)
output, _, normalized_logits = self.create_discrete_action_masking_layer(
self.all_log_probs, self.action_masks, self.act_size
)
self.output = tf.identity(output)
self.normalized_logits = tf.identity(normalized_logits, name="action")
self.create_value_heads(self.stream_names, hidden)
self.action_holder = tf.placeholder(
shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
)
self.action_oh = tf.concat(
[
tf.one_hot(self.action_holder[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
self.selected_actions = tf.stop_gradient(self.action_oh)
self.all_old_log_probs = tf.placeholder(
shape=[None, sum(self.act_size)], dtype=tf.float32, name="old_probabilities"
)
_, _, old_normalized_logits = self.create_discrete_action_masking_layer(
self.all_old_log_probs, self.action_masks, self.act_size
)
action_idx = [0] + list(np.cumsum(self.act_size))
"curiosity_stream_{}_visual_obs_encoder".format(i),
False,
)
encoded_next_visual = ModelUtils.create_visual_observation_encoder(
self.next_visual_in[i],
self.encoding_size,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_encoders.append(encoded_visual)
next_visual_encoders.append(encoded_next_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
hidden_next_visual = tf.concat(next_visual_encoders, axis=1)
encoded_state_list.append(hidden_visual)
encoded_next_state_list.append(hidden_next_visual)
if self.policy.vec_obs_size > 0:
# Create the encoder ops for current and next vector input.
# Note that these encoders are siamese.
# Create input op for next (t+1) vector observation.
self.next_vector_in = tf.placeholder(
shape=[None, self.policy.vec_obs_size],
dtype=tf.float32,
name="curiosity_next_vector_observation",
)
encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
self.policy.vector_in,
"gail_stream_{}_visual_obs_encoder".format(i),
False,
)
encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
self.expert_visual_in[i],
self.encoding_size,
ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
True,
)
visual_policy_encoders.append(encoded_policy_visual)
visual_expert_encoders.append(encoded_expert_visual)
hidden_policy_visual = tf.concat(visual_policy_encoders, axis=1)
hidden_expert_visual = tf.concat(visual_expert_encoders, axis=1)
encoded_policy_list.append(hidden_policy_visual)
encoded_expert_list.append(hidden_expert_visual)
self.encoded_expert = tf.concat(encoded_expert_list, axis=1)
self.encoded_policy = tf.concat(encoded_policy_list, axis=1)
2,
"curiosity_vector_obs_encoder",
False,
)
encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder(
self.next_vector_in,
self.encoding_size,
ModelUtils.swish,
2,
"curiosity_vector_obs_encoder",
True,
)
encoded_state_list.append(encoded_vector_obs)
encoded_next_state_list.append(encoded_next_vector_obs)
encoded_state = tf.concat(encoded_state_list, axis=1)
encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
return encoded_state, encoded_next_state
:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated
normalized probs (after softmax)
and the concatenated normalized log probs
"""
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [
tf.multiply(tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
for k in range(len(action_size))
]
normalized_probs = [
tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
for k in range(len(action_size))
]
output = tf.concat(
[
tf.multinomial(tf.log(normalized_probs[k] + EPSILON), 1)
for k in range(len(action_size))
],
axis=1,
)
return (
output,
tf.concat([normalized_probs[k] for k in range(len(action_size))], axis=1),
tf.concat(
[
tf.log(normalized_probs[k] + EPSILON)
for k in range(len(action_size))
],
axis=1,
),
shape=[None, len(policy_branches)], dtype=tf.int32, name="action_holder"
)
self.output_oh = tf.concat(
[
tf.one_hot(self.action_holder[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
# For Curiosity and GAIL to retrieve selected actions. We don't
# need the mask at this point because it's already stored in the buffer.
self.selected_actions = tf.stop_gradient(self.output_oh)
self.external_action_in = tf.concat(
[
tf.one_hot(self.action_holder[:, i], self.act_size[i])
for i in range(len(self.act_size))
],
axis=1,
)
# This is total entropy over all branches
self.entropy = -1 * tf.reduce_sum(self.all_log_probs, axis=1)
# Extract the normalized logprobs for Barracuda
self.normalized_logprobs = tf.identity(normalized_logprobs, name="action")
# We kept the LSTMs at a different scope than the rest, so add them if they exist.
self.policy_vars = self.get_vars(scope)
if self.use_recurrent:
f"{_scope_add}main_graph_{i}_encoder{j}", # scope
False, # reuse
)
visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if vector_in.get_shape()[-1] > 0: # Don't encode 0-shape inputs
hidden_state = ModelUtils.create_vector_observation_encoder(
vector_observation_input,
h_size,
activation_fn,
num_layers,
scope=f"{_scope_add}main_graph_{i}",
reuse=False,
)
if hidden_state is not None and hidden_visual is not None:
final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
elif hidden_state is None and hidden_visual is not None:
final_hidden = hidden_visual
elif hidden_state is not None and hidden_visual is None:
final_hidden = hidden_state
else:
raise Exception(
"No valid network configuration possible. "
"There are no states or observations in this brain"
)
final_hiddens.append(final_hidden)
return final_hiddens