Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def tf_q_delta(self, q_value, next_q_value, terminal, reward):
"""
Creates the deltas (or advantage) of the Q values.
:return: A list of deltas per action
"""
for _ in range(util.rank(x=q_value) - 1):
terminal = tf.expand_dims(input=terminal, axis=1)
reward = tf.expand_dims(input=reward, axis=1)
multiples = (1,) + util.shape(q_value)[1:]
terminal = tf.tile(input=terminal, multiples=multiples)
reward = tf.tile(input=reward, multiples=multiples)
zeros = tf.zeros_like(tensor=next_q_value)
discount = self.discount.value()
next_q_value = tf.where(condition=terminal, x=zeros, y=(discount * next_q_value))
return reward + next_q_value - q_value # tf.stop_gradient(q_target)
def first_run():
fill_buffer = (self.length,) + tuple(1 for _ in range(util.rank(tensor) - 1))
return tf.assign(ref=states_buffer, value=tf.tile(input=tensor, multiples=fill_buffer))
def tf_apply(self, x, update, state):
if util.rank(x) != 2:
raise TensorForceError(
'Invalid input rank for internal lstm layer: {}, must be 2.'.format(util.rank(x))
)
state = tf.contrib.rnn.LSTMStateTuple(c=state[:, 0, :], h=state[:, 1, :])
self.lstm_cell = tf.contrib.rnn.LSTMCell(num_units=self.size, **self.lstmcell_args)
if self.dropout is not None:
keep_prob = tf.cond(pred=update, true_fn=(lambda: 1.0 - self.dropout), false_fn=(lambda: 1.0))
self.lstm_cell = tf.contrib.rnn.DropoutWrapper(cell=self.lstm_cell, output_keep_prob=keep_prob)
x, state = self.lstm_cell(inputs=x, state=state)
state = tf.stack(values=(state.c, state.h), axis=1)
def tf_process(self, tensor):
if self.across_batch:
axes = tuple(range(util.rank(tensor)))
else:
axes = tuple(range(1, util.rank(tensor)))
mean, variance = tf.nn.moments(x=tensor, axes=axes, keep_dims=True)
return (tensor - mean) / tf.maximum(x=tf.sqrt(variance), y=util.epsilon)
def conv1d(x, size, window=3, stride=1, padding='SAME', bias=False, activation='relu',
l2_regularization=0.0, scope='conv1d', summary_level=0):
"""A 1d convolutional layer.
Args:
x: Input tensor. Must be rank 3
size: Neurons
window: Filter window size
stride: Filter stride
padding: One of [VALID, SAME]
bias: Bool, indicates whether bias is used
activation: Non-linearity type, defaults to relu
l2_regularization: L2-regularisation value
Returns:
"""
input_rank = util.rank(x)
if input_rank != 3:
raise TensorForceError('Invalid input rank for conv1d layer: {}, must be 3'.format(input_rank))
with tf.variable_scope(scope):
filters_shape = (window, x.shape[2].value, size)
stddev = min(0.1, sqrt(2.0 / size))
filters_init = tf.random_normal_initializer(mean=0.0, stddev=stddev, dtype=tf.float32)
filters = tf.get_variable(name='W', shape=filters_shape, dtype=tf.float32, initializer=filters_init)
if l2_regularization > 0.0:
tf.losses.add_loss(l2_regularization * tf.nn.l2_loss(t=filters))
x = tf.nn.conv1d(value=x, filters=filters, stride=stride, padding=padding)
if bias:
bias_shape = (size,)
def body(indices, remaining, current_x, current_aggregates):
current_x = tf.gather(params=x, indices=indices)
next_x, next_aggregates = self.iterative_step(
x=current_x, previous=current_aggregates
)
with tf.control_dependencies(control_inputs=(current_x, next_x)):
is_finished = tf.math.equal(x=remaining, y=zeros)
if isinstance(next_aggregates, dict):
for name, current_aggregate, next_aggregate in util.zip_items(
current_aggregates, next_aggregates
):
condition = is_finished
for _ in range(util.rank(x=current_aggregate) - 1):
condition = tf.expand_dims(input=condition, axis=1)
next_aggregates[name] = tf.where(
condition=condition, x=current_aggregate, y=next_aggregate
)
else:
condition = is_finished
for _ in range(util.rank(x=current_aggregates) - 1):
condition = tf.expand_dims(input=condition, axis=1)
next_aggregates = tf.where(
condition=condition, x=current_aggregates, y=next_aggregates
)
remaining -= tf.where(condition=is_finished, x=zeros, y=ones)
indices += tf.where(
condition=tf.math.equal(x=remaining, y=zeros), x=zeros, y=ones
)
return indices, remaining, next_x, next_aggregates
def tf_q_delta(self, q_value, next_q_value, terminal, reward):
for _ in range(util.rank(x=q_value) - 1):
terminal = tf.expand_dims(input=terminal, axis=1)
reward = tf.expand_dims(input=reward, axis=1)
multiples = (1,) + util.shape(x=q_value)[1:]
terminal = tf.tile(input=terminal, multiples=multiples)
reward = tf.tile(input=reward, multiples=multiples)
reward = self.discounted_cumulative_reward(
terminal=terminal, reward=reward, final_reward=next_q_value[-1]
)
return reward - q_value
"""
Linear layer.
Args:
x: Input tensor. Must be rank 2
size: Neurons in layer
weights: None for random matrix, otherwise given float or array is used.
bias: Bool to indicate whether bias is used, otherwise given float or array is used.
l2_regularization: L2-regularisation value
weights: Weights for layer. If none, initialisation defaults to Xavier (normal with
size/shape dependent standard deviation).
Returns:
"""
input_rank = util.rank(x)
if input_rank != 2:
raise TensorForceError('Invalid input rank for linear layer: {},'
' must be 2.'.format(input_rank))
with tf.variable_scope(scope):
weights_shape = (x.shape[1].value, size)
if weights is None:
stddev = min(0.1, sqrt(2.0 / (x.shape[1].value + size)))
weights_init = tf.random_normal_initializer(mean=0.0, stddev=stddev, dtype=tf.float32)
elif isinstance(weights, float):
if weights == 0.0:
weights_init = tf.zeros_initializer(dtype=tf.float32)
else:
weights_init = tf.constant_initializer(value=weights, dtype=tf.float32)
if action_spec['type'] == 'bool':
action = tf.where(
condition=(tf.random_uniform(shape=action_shape[0]) < exploration_value),
x=(tf.random_uniform(shape=action_shape) < 0.5),
y=action
)
elif action_spec['type'] == 'int':
action = tf.where(
condition=(tf.random_uniform(shape=action_shape) < exploration_value),
x=tf.random_uniform(shape=action_shape, maxval=action_spec['num_actions'], dtype=util.tf_dtype('int')),
y=action
)
elif action_spec['type'] == 'float':
for _ in range(util.rank(action) - 1):
exploration_value = tf.expand_dims(input=exploration_value, axis=-1)
action += exploration_value
if 'min_value' in action_spec:
action = tf.clip_by_value(
t=action,
clip_value_min=action_spec['min_value'],
clip_value_max=action_spec['max_value']
)
return action