Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
vis_encode_type=vis_encode_type,
)
self.target_network = SACTargetNetwork(
policy=self.policy,
m_size=self.policy.m_size, # 1x policy.m_size
h_size=h_size,
normalize=self.policy.normalize,
use_recurrent=self.policy.use_recurrent,
num_layers=num_layers,
stream_names=stream_names,
vis_encode_type=vis_encode_type,
)
# The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
self.m_size = 3 * self.policy.m_size
self._create_inputs_and_outputs()
self.learning_rate = ModelUtils.create_schedule(
lr_schedule,
lr,
self.policy.global_step,
int(max_step),
min_value=1e-10,
)
self._create_losses(
self.policy_network.q1_heads,
self.policy_network.q2_heads,
lr,
int(max_step),
stream_names,
discrete=not self.policy.use_continuous_act,
)
self._create_sac_optimizer_ops()
def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
ENCODER_FUNCTION_BY_TYPE = {
EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
}
return ENCODER_FUNCTION_BY_TYPE.get(
encoder_type, ModelUtils.create_visual_observation_encoder
)
Creates the encoder for the discriminator
:param state_in: The encoded observation input
:param action_in: The action input
:param done_in: The done flags input
:param reuse: If true, the weights will be shared with the previous encoder created
"""
with tf.variable_scope("GAIL_model"):
if self.use_actions:
concat_input = tf.concat([state_in, action_in, done_in], axis=1)
else:
concat_input = state_in
hidden_1 = tf.layers.dense(
concat_input,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_1",
reuse=reuse,
)
hidden_2 = tf.layers.dense(
hidden_1,
self.h_size,
activation=ModelUtils.swish,
name="gail_d_hidden_2",
reuse=reuse,
)
z_mean = None
if self.use_vail:
# Latent representation
z_mean = tf.layers.dense(
self.memory_out = memory_value_out
else:
hidden_value = hidden_stream
self.value_heads, self.value = ModelUtils.create_value_heads(
self.stream_names, hidden_value
)
self.all_old_log_probs = tf.placeholder(
shape=[None, sum(self.policy.act_size)],
dtype=tf.float32,
name="old_probabilities",
)
# Break old log probs into separate branches
old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)
_, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
old_log_prob_branches, self.policy.action_masks, self.policy.act_size
)
action_idx = [0] + list(np.cumsum(self.policy.act_size))
self.old_log_probs = tf.reduce_sum(
(
tf.stack(
[
-tf.nn.softmax_cross_entropy_with_logits_v2(
labels=self.policy.selected_actions[
:, action_idx[i] : action_idx[i + 1]
"""
Creates state encoders for current and future observations.
Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
See https://arxiv.org/abs/1705.05363 for more details.
:return: current and future state encoder tensors.
"""
encoded_state_list = []
encoded_next_state_list = []
if self.policy.vis_obs_size > 0:
self.next_visual_in = []
visual_encoders = []
next_visual_encoders = []
for i in range(self.policy.vis_obs_size):
# Create input ops for next (t+1) visual observations.
next_visual_input = ModelUtils.create_visual_input(
self.policy.brain.camera_resolutions[i],
name="curiosity_next_visual_observation_" + str(i),
)
self.next_visual_in.append(next_visual_input)
# Create the encoder ops for current and next visual input.
# Note that these encoders are siamese.
encoded_visual = ModelUtils.create_visual_observation_encoder(
self.policy.visual_in[i],
self.encoding_size,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
False,
)
def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
ENCODER_FUNCTION_BY_TYPE = {
EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
}
return ENCODER_FUNCTION_BY_TYPE.get(
encoder_type, ModelUtils.create_visual_observation_encoder
)
self,
visual_in: List[tf.Tensor],
vector_in: tf.Tensor,
h_size: int,
num_layers: int,
vis_encode_type: EncoderType,
) -> tf.Tensor:
"""
Creates an encoder for visual and vector observations.
:param h_size: Size of hidden linear layers.
:param num_layers: Number of hidden linear layers.
:param vis_encode_type: Type of visual encoder to use if visual input.
:return: The hidden layer (tf.Tensor) after the encoder.
"""
with tf.variable_scope("policy"):
encoded = ModelUtils.create_observation_streams(
self.visual_in,
self.processed_vector_in,
1,
h_size,
num_layers,
vis_encode_type,
)[0]
return encoded
num_streams: int,
h_size: int,
num_layers: int,
vis_encode_type: EncoderType = EncoderType.SIMPLE,
stream_scopes: List[str] = None,
) -> List[tf.Tensor]:
"""
Creates encoding stream for observations.
:param num_streams: Number of streams to create.
:param h_size: Size of hidden linear layers in stream.
:param num_layers: Number of hidden linear layers in stream.
:param stream_scopes: List of strings (length == num_streams), which contains
the scopes for each of the streams. None if all under the same TF scope.
:return: List of encoded streams.
"""
activation_fn = ModelUtils.swish
vector_observation_input = vector_in
final_hiddens = []
for i in range(num_streams):
# Pick the encoder function based on the EncoderType
create_encoder_func = ModelUtils.get_encoder_for_type(vis_encode_type)
visual_encoders = []
hidden_state, hidden_visual = None, None
_scope_add = stream_scopes[i] if stream_scopes else ""
if len(visual_in) > 0:
for j, vis_in in enumerate(visual_in):
ModelUtils._check_resolution_for_encoder(vis_in, vis_encode_type)
encoded_visual = create_encoder_func(
vis_in,
h_size,
tf.one_hot(self.action_in_expert[:, i], act_size)
for i, act_size in enumerate(self.policy.act_size)
],
axis=1,
)
encoded_policy_list = []
encoded_expert_list = []
if self.policy.vec_obs_size > 0:
self.obs_in_expert = tf.placeholder(
shape=[None, self.policy.vec_obs_size], dtype=tf.float32
)
if self.policy.normalize:
encoded_expert_list.append(
ModelUtils.normalize_vector_obs(
self.obs_in_expert,
self.policy.running_mean,
self.policy.running_variance,
self.policy.normalization_steps,
)
)
encoded_policy_list.append(self.policy.processed_vector_in)
else:
encoded_expert_list.append(self.obs_in_expert)
encoded_policy_list.append(self.policy.vector_in)
if self.policy.vis_obs_size > 0:
self.expert_visual_in: List[tf.Tensor] = []
visual_policy_encoders = []
visual_expert_encoders = []
for i in range(self.policy.vis_obs_size):
def create_forward_model(
self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
) -> None:
"""
Creates forward model TensorFlow ops for Curiosity module.
Predicts encoded future state based on encoded current state and given action.
:param encoded_state: Tensor corresponding to encoded current state.
:param encoded_next_state: Tensor corresponding to encoded next state.
"""
combined_input = tf.concat(
[encoded_state, self.policy.selected_actions], axis=1
)
hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
pred_next_state = tf.layers.dense(
hidden,
self.encoding_size
* (self.policy.vis_obs_size + int(self.policy.vec_obs_size > 0)),
activation=None,
)
squared_difference = 0.5 * tf.reduce_sum(
tf.squared_difference(pred_next_state, encoded_next_state), axis=1
)
self.intrinsic_reward = squared_difference
self.forward_loss = tf.reduce_mean(
tf.dynamic_partition(squared_difference, self.policy.mask, 2)[1]
)