Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
variable = collection[0]
else:
tf_dtype = util.tf_dtype(dtype=dtype)
# Variable initializer
if isinstance(initializer, util.py_dtype(dtype=dtype)):
initializer = tf.constant(value=initializer, dtype=tf_dtype, shape=shape)
elif isinstance(initializer, np.ndarray):
if initializer.shape != shape:
raise TensorforceError(
"Invalid variable initializer shape: {}.".format(initializer.shape)
)
initializer = tf.constant(value=initializer, dtype=tf_dtype)
elif isinstance(initializer, tf.Tensor):
if util.shape(x=initializer) != shape:
raise TensorforceError(
"Invalid variable initializer shape: {}.".format(util.shape(x=initializer))
)
initializer = initializer
elif not isinstance(initializer, str):
raise TensorforceError("Invalid variable initializer: {}".format(initializer))
elif initializer[:6] == 'normal':
if dtype != 'float':
raise TensorforceError(
message="Invalid variable initializer value for non-float variable: {}.".format(
initializer
)
)
if initializer[6:] == '-relu':
stddev = min(0.1, sqrt(2.0 / util.product(xs=shape[:-1])))
else:
def tf_loss_per_instance(
self, policy, states, internals, auxiliaries, actions, reward, reference=None
):
assert self.ratio_based or reference is None
log_probability = policy.log_probability(
states=states, internals=internals, auxiliaries=auxiliaries, actions=actions,
reduced=self.early_reduce
)
zero = tf.constant(value=0.0, dtype=util.tf_dtype(dtype='float'))
one = tf.constant(value=1.0, dtype=util.tf_dtype(dtype='float'))
clipping_value = self.clipping_value.value()
if self.ratio_based:
if reference is None:
reference = log_probability
scaling = tf.exp(x=(log_probability - tf.stop_gradient(input=reference)))
min_value = one / (one + clipping_value)
max_value = one + clipping_value
else:
scaling = log_probability
min_value = -clipping_value
max_value = log_probability + one
def tf_loss_per_instance(
self, states, internals, actions, terminal, reward, next_states, next_internals,
reference=None
):
embedding = self.network.apply(x=states, internals=internals)
log_probs = list()
for name, distribution in self.distributions.items():
parameters = distribution.parametrize(x=embedding)
action = actions[name]
log_prob = distribution.log_probability(parameters=parameters, action=action)
collapsed_size = util.product(xs=util.shape(log_prob)[1:])
log_prob = tf.reshape(tensor=log_prob, shape=(-1, collapsed_size))
log_probs.append(log_prob)
log_probs = tf.concat(values=log_probs, axis=1)
log_prob_per_instance = tf.reduce_mean(input_tensor=log_probs, axis=1)
return -log_prob_per_instance * reward
)
# Overwrite buffer rewards
with tf.control_dependencies(control_inputs=assertions):
indices = tf.range(
start=self.buffer_index, limit=(self.buffer_index + num_overwritten)
)
indices = tf.math.mod(x=indices, y=capacity)
indices = tf.expand_dims(input=indices, axis=1)
assignment = self.buffers['reward'].scatter_nd_update(
indices=indices, updates=discounted_sum
)
with tf.control_dependencies(control_inputs=(assignment,)):
return util.no_operation()
def tf_retrieve_timesteps(self, n, past_padding, future_padding):
one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))
capacity = tf.constant(value=self.capacity, dtype=util.tf_dtype(dtype='long'))
# # Start index of oldest episode
# oldest_episode_start = self.terminal_indices[0] + one + past_padding
# # Number of timesteps (minus/plus one to prevent zero but allow capacity)
# num_timesteps = self.buffer_index - oldest_episode_start - future_padding - one
# num_timesteps = tf.math.mod(x=num_timesteps, y=capacity) + one
# Check whether memory contains enough timesteps
num_timesteps = tf.minimum(x=self.buffer_index, y=capacity) - past_padding - future_padding
assertion = tf.debugging.assert_less_equal(x=n, y=num_timesteps)
# Randomly sampled timestep indices
with tf.control_dependencies(control_inputs=(assertion,)):
indices = tf.random.uniform(
shape=(n,), maxval=num_timesteps, dtype=util.tf_dtype(dtype='long')
# MemoryModel
update_mode=update_mode, memory=memory, optimizer=optimizer, discount=discount,
# DistributionModel
network=network, distributions=distributions,
entropy_regularization=entropy_regularization,
# QModel
target_sync_frequency=target_sync_frequency, target_update_weight=target_update_weight,
double_q_model=double_q_model, huber_loss=huber_loss
)
self.state_values = OrderedDict()
self.l_entries = OrderedDict()
embedding_size = self.network.get_output_spec()['shape'][0]
input_spec = dict(type='float', shape=(embedding_size,))
for name, action_spec in self.actions_spec.items():
action_size = util.product(xs=action_spec['shape'])
self.state_values[name] = self.add_module(
name=(name + '-state-value'), module='linear', modules=layer_modules,
size=action_size, input_spec=input_spec
)
self.l_entries[name] = self.add_module(
name=(name + '-l-entries'), module='linear', modules=layer_modules,
size=action_size, input_spec=input_spec
)
def __init__(self, name, dtype, shape=(), unit=None, summary_labels=None):
super().__init__(name=name, summary_labels=summary_labels)
assert unit in (None, 'timesteps', 'episodes', 'updates')
spec = dict(type=dtype, shape=shape)
spec = util.valid_value_spec(value_spec=spec, return_normalized=True)
self.dtype = spec['type']
self.shape = spec['shape']
self.unit = unit
Module.register_tensor(name=self.name, spec=spec, batched=False)
def first_sequence():
assignment = self.has_previous.assign(
value=tf.constant(value=True, dtype=util.tf_dtype(dtype='bool')), read_value=False
)
with tf.control_dependencies(control_inputs=(assignment,)):
if self.concatenate:
current = x
else:
current = tf.expand_dims(input=x, axis=(self.axis + 1))
multiples = tuple(
self.length if dims == self.axis + 1 else 1
for dims in range(util.rank(x=current))
)
return tf.tile(input=x, multiples=multiples)