Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from collections import OrderedDict
import tensorflow as tf
from tensorforce import TensorforceError, util
import tensorforce.core
from tensorforce.core import Module, parameter_modules
from tensorforce.core.parameters import Parameter
class Layer(Module):
"""
Base class for neural network layers.
Args:
name (string): Layer name
(<span style="color:#00C000"><b>default</b></span>: internally chosen).
input_spec (specification): Input tensor specification
(<span style="color:#00C000"><b>internal use</b></span>).
summary_labels ('all' | iter[string]): Labels of summaries to record
(<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
l2_regularization (float >= 0.0): Scalar controlling L2 regularization
(<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
"""
layers = None
read_value=False
)
with tf.control_dependencies(control_inputs=(assignment,)):
variance = self.moving_variance.assign(value=variance)
mean = self.moving_mean.assign(value=mean)
return mean, variance
optimization = Module.retrieve_tensor(name='optimization')
update_on_optimization = tf.where(
condition=self.after_first_call, x=self.update_on_optimization, y=optimization
)
update_on_optimization = self.update_on_optimization.assign(value=update_on_optimization)
skip_update = tf.math.logical_or(
x=Module.retrieve_tensor(name='independent'),
y=tf.math.not_equal(x=update_on_optimization, y=optimization)
)
mean, variance = self.cond(pred=skip_update, true_fn=no_update, false_fn=apply_update)
epsilon = tf.constant(value=util.epsilon, dtype=util.tf_dtype(dtype='float'))
reciprocal_stddev = tf.math.rsqrt(x=tf.maximum(x=variance, y=epsilon))
x = (x - tf.stop_gradient(input=mean)) * tf.stop_gradient(input=reciprocal_stddev)
return x
def tf_core_act(self, states, internals):
embedding, internals = self.network.apply(
x=states, internals=internals, return_internals=True
)
actions = OrderedDict()
for name, distribution in self.distributions.items():
parameters = distribution.parametrize(x=embedding)
deterministic = Module.retrieve_tensor(name='deterministic')
deterministic = tf.logical_or(
x=deterministic,
y=tf.constant(value=self.requires_deterministic, dtype=util.tf_dtype(dtype='bool'))
)
action = distribution.sample(parameters=parameters, deterministic=deterministic)
entropy = distribution.entropy(parameters=parameters)
collapsed_size = util.product(xs=util.shape(entropy)[1:])
entropy = tf.reshape(tensor=entropy, shape=(-1, collapsed_size))
entropy = tf.reduce_mean(input_tensor=entropy, axis=1)
actions[name] = self.add_summary(
label='entropy', name=(name + '-entropy'), tensor=entropy, pass_tensors=action
)
return actions, internals
def tf_apply(self, x, initial=None):
zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
one = tf.constant(value=1, dtype=util.tf_dtype(dtype='long'))
dependency_starts = Module.retrieve_tensor(name='dependency_starts')
dependency_lengths = Module.retrieve_tensor(name='dependency_lengths')
if util.tf_dtype(dtype='long') in (tf.int32, tf.int64):
batch_size = tf.shape(input=dependency_starts, out_type=util.tf_dtype(dtype='long'))[0]
else:
batch_size = tf.dtypes.cast(
x=tf.shape(input=dependency_starts)[0], dtype=util.tf_dtype(dtype='long')
)
zeros = tf.zeros(shape=(batch_size,), dtype=util.tf_dtype(dtype='long'))
ones = tf.ones(shape=(batch_size,), dtype=util.tf_dtype(dtype='long'))
# maximum_iterations = tf.math.reduce_max(input_tensor=lengths, axis=0)
horizon = self.dependency_horizon.value() + one # including 0th step
starts = dependency_starts + tf.maximum(x=(dependency_lengths - horizon), y=zeros)
lengths = dependency_lengths - tf.maximum(x=(dependency_lengths - horizon), y=zeros)
horizon = tf.minimum(x=horizon, y=tf.math.reduce_max(input_tensor=lengths, axis=0))
if self.processing == 'cumulative':
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from collections import OrderedDict
from tensorforce.core import Module
class Objective(Module):
"""
Base class for optimization objectives.
Args:
name (string): Module name
(<span style="color:#0000C0"><b>internal use</b></span>).
summary_labels ('all' | iter[string]): Labels of summaries to record
(<span style="color:#00C000"><b>default</b></span>: inherit value of parent module).
"""
def __init__(self, name, summary_labels=None):
super().__init__(name=name, summary_labels=summary_labels)
def tf_loss_per_instance(self, policy, states, internals, auxiliaries, actions, reward):
raise NotImplementedError
states_value = self.value.apply(x=x)
if len(self.embedding_shape) == 1:
states_value = tf.reshape(tensor=states_value, shape=value_shape)
action_values = states_value + action_values - tf.math.reduce_mean(
input_tensor=action_values, axis=-1, keepdims=True
)
states_value = tf.squeeze(input=states_value, axis=-1)
action_values = tf.where(condition=mask, x=action_values, y=min_float)
# Softmax for corresponding probabilities
probabilities = tf.nn.softmax(logits=action_values, axis=-1)
# "Normalized" logits
logits = tf.math.log(x=tf.maximum(x=probabilities, y=epsilon))
Module.update_tensor(name=(self.name + '-probabilities'), tensor=probabilities)
return logits, probabilities, states_value, action_values
# Log standard deviation
log_stddev = self.log_stddev.apply(x=x)
if len(self.embedding_shape) == 1:
log_stddev = tf.reshape(tensor=log_stddev, shape=shape)
# Clip log_stddev for numerical stability
# epsilon < 1.0, hence negative
log_stddev = tf.clip_by_value(
t=log_stddev, clip_value_min=log_epsilon, clip_value_max=-log_epsilon
)
# Standard deviation
stddev = tf.exp(x=log_stddev)
Module.update_tensor(name=(self.name + '-mean'), tensor=mean)
Module.update_tensor(name=(self.name + '-stddev'), tensor=stddev)
return mean, stddev, log_stddev
# States value
states_value = logit
# Sigmoid for corresponding probability
probability = tf.sigmoid(x=logit)
# Clip probability for numerical stability
probability = tf.clip_by_value(
t=probability, clip_value_min=epsilon, clip_value_max=(one - epsilon)
)
# "Normalized" logits
true_logit = tf.math.log(x=probability)
false_logit = tf.math.log(x=(one - probability))
Module.update_tensor(name=(self.name + '-probability'), tensor=probability)
return true_logit, false_logit, probability, states_value
"""
stored = self.memory.store(
states=states, internals=internals, actions=actions, terminal=terminal, reward=reward
)
# Periodic optimization
with tf.control_dependencies(control_inputs=(stored,)):
zero = tf.constant(value=0, dtype=util.tf_dtype(dtype='long'))
batch_size = self.update_batch_size.value()
frequency = self.update_frequency.value()
start = self.update_start.value()
start = tf.maximum(x=start, y=batch_size)
if self.update_unit == 'timesteps':
# Timestep-based batch
timestep = Module.retrieve_tensor(name='timestep')
is_frequency = tf.math.equal(x=tf.mod(x=timestep, y=frequency), y=zero)
at_least_start = tf.math.greater_equal(x=timestep, y=start)
elif self.update_unit == 'sequences':
# Timestep-sequence-based batch
timestep = Module.retrieve_tensor(name='timestep')
sequence_length = self.update_sequence_length.value()
is_frequency = tf.math.equal(x=tf.mod(x=timestep, y=frequency), y=zero)
at_least_start = tf.math.greater_equal(x=timestep, y=(start + sequence_length - 1))
elif self.update_unit == 'episodes':
# Episode-based batch
episode = Module.retrieve_tensor(name='episode')
is_frequency = tf.math.equal(x=tf.mod(x=episode, y=frequency), y=zero)
# Only update once per episode increment
is_frequency = tf.math.logical_and(x=is_frequency, y=terminal[-1])
# Beta
beta = self.beta.apply(x=x)
# epsilon < 1.0, hence negative
beta = tf.clip_by_value(t=beta, clip_value_min=log_epsilon, clip_value_max=-log_epsilon)
beta = tf.math.softplus(features=beta) + one
if len(self.embedding_shape) == 1:
beta = tf.reshape(tensor=beta, shape=shape)
# Alpha + Beta
alpha_beta = tf.maximum(x=(alpha + beta), y=epsilon)
# Log norm
log_norm = tf.math.lgamma(x=alpha) + tf.math.lgamma(x=beta) - tf.math.lgamma(x=alpha_beta)
Module.update_tensor(name=(self.name + '-alpha'), tensor=alpha)
Module.update_tensor(name=(self.name + '-beta'), tensor=beta)
return alpha, beta, alpha_beta, log_norm