Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
name='baseline-objective', module=baseline_objective, modules=objective_modules,
is_trainable=False, is_subscope=True
)
# Entropy regularization
entropy_regularization = 0.0 if entropy_regularization is None else entropy_regularization
self.entropy_regularization = self.add_module(
name='entropy-regularization', module=entropy_regularization,
modules=parameter_modules, is_trainable=False, dtype='float'
)
# Internals initialization
self.internals_init.update(self.policy.internals_init())
self.internals_init.update(self.baseline_policy.internals_init())
if any(internal_init is None for internal_init in self.internals_init.values()):
raise TensorforceError.unexpected()
# Register global tensors
Module.register_tensor(name='update', spec=dict(type='long', shape=()), batched=False)
Module.register_tensor(
name='optimization', spec=dict(type='bool', shape=()), batched=False
)
Module.register_tensor(
name='dependency_starts', spec=dict(type='long', shape=()), batched=True
)
Module.register_tensor(
name='dependency_lengths', spec=dict(type='long', shape=()), batched=True
)
def initialize(self):
"""
Initializes the agent.
"""
if self.is_initialized:
raise TensorforceError.unexpected()
self.is_initialized = True
# Parallel terminal/reward buffers
self.terminal_buffers = np.ndarray(
shape=(self.parallel_interactions, self.buffer_observe),
dtype=util.np_dtype(dtype='long')
)
self.reward_buffers = np.ndarray(
shape=(self.parallel_interactions, self.buffer_observe),
dtype=util.np_dtype(dtype='float')
)
# Recorder buffers if required
if self.recorder_spec is not None:
self.states_buffers = OrderedDict()
if isinstance(buffer_observe, bool):
if not buffer_observe and self.parallel_interactions > 1:
raise TensorforceError.unexpected()
if self.max_episode_timesteps is None and self.parallel_interactions > 1:
raise TensorforceError.unexpected()
if not buffer_observe:
self.buffer_observe = 1
elif self.max_episode_timesteps is None:
self.buffer_observe = 100
else:
self.buffer_observe = self.max_episode_timesteps
elif isinstance(buffer_observe, int):
if buffer_observe <= 0:
raise TensorforceError.value(name='buffer_observe', value=buffer_observe)
if self.parallel_interactions > 1:
raise TensorforceError.unexpected()
if self.max_episode_timesteps is None:
self.buffer_observe = buffer_observe
else:
self.buffer_observe = min(buffer_observe, self.max_episode_timesteps)
else:
raise TensorforceError.type(name='buffer_observe', value=buffer_observe)
# Parallel terminal/reward buffers
self.terminal_buffers = np.ndarray(
shape=(self.parallel_interactions, self.buffer_observe),
dtype=util.np_dtype(dtype='long')
)
self.reward_buffers = np.ndarray(
shape=(self.parallel_interactions, self.buffer_observe),
dtype=util.np_dtype(dtype='float')
)
raise TensorforceError.exists(name='variable', value=name)
# dtype
if not util.is_valid_type(dtype=dtype):
raise TensorforceError.value(name='variable', argument='dtype', value=dtype)
# shape
if not util.is_iterable(x=shape) or not all(isinstance(dims, int) for dims in shape):
raise TensorforceError.type(name='variable', argument='shape', value=shape)
elif not all(dims > 0 for dims in shape):
raise TensorforceError.value(name='variable', argument='shape', value=shape)
# is_trainable
if not isinstance(is_trainable, bool):
raise TensorforceError.type(
name='variable', argument='is_trainable', value=is_trainable
)
elif is_trainable and dtype != 'float':
raise TensorforceError.unexpected()
# initializer
initializer_names = (
'normal', 'normal-relu', 'orthogonal', 'orthogonal-relu', 'zeros', 'ones'
)
if not isinstance(initializer, (util.py_dtype(dtype=dtype), np.ndarray, tf.Tensor)) and \
initializer not in initializer_names:
raise TensorforceError.value(
name='variable', argument='initializer', value=initializer
)
elif isinstance(initializer, np.ndarray) and \
initializer.dtype != util.np_dtype(dtype=dtype):
raise TensorforceError.type(
name='variable', argument='initializer', value=initializer
)
elif isinstance(initializer, tf.Tensor) and util.dtype(x=initializer) != dtype:
raise TensorforceError.type(
):
"""
Temporal layer constructor.
Args:
processing ('cumulative' | 'iterative'): Temporal processing type (**required**).
dependency_horizon (parameter, long >= 0): (**required**).
kwargs: Additional arguments for potential parent class.
"""
super().__init__(
name=name, input_spec=input_spec, summary_labels=summary_labels,
l2_regularization=l2_regularization, **kwargs
)
if processing not in ('cumulative', 'iterative'):
raise TensorforceError.unexpected()
self.processing = processing
self.dependency_horizon = self.add_module(
name='dependency-horizon', module=dependency_horizon, modules=parameter_modules,
is_trainable=False, dtype='long'
)
if not util.is_valid_type(dtype=dtype):
raise TensorforceError.value(name='placeholder', argument='dtype', value=dtype)
# shape
if not util.is_iterable(x=shape) or not all(isinstance(dims, int) for dims in shape):
raise TensorforceError.type(name='placeholder', argument='shape', value=shape)
elif not all(dims > 0 for dims in shape):
raise TensorforceError.value(name='placeholder', argument='shape', value=shape)
# batched
if not isinstance(batched, bool):
raise TensorforceError.type(name='placeholder', argument='batched', value=batched)
# default
if default is not None:
# if batched:
# raise TensorforceError.unexpected()
if not isinstance(default, tf.Tensor):
raise TensorforceError.unexpected()
elif util.dtype(x=default) != dtype:
raise TensorforceError.unexpected()
# Placeholder
if batched:
shape = (None,) + shape
if default is None:
dtype = util.tf_dtype(dtype=dtype)
placeholder = tf.compat.v1.placeholder(dtype=dtype, shape=shape, name=name)
else:
# check dtype and shape !!!
placeholder = tf.compat.v1.placeholder_with_default(
input=default, shape=shape, name=name
)
return placeholder
def execute(self, actions):
if self.environment.game_over():
raise TensorforceError.unexpected()
reward = self.environment.act(action=self.available_actions[actions])
terminal = self.environment.game_over()
states = self.get_states()
return states, terminal, reward
if isinstance(update, int):
update = dict(unit='timesteps', batch_size=update)
reward_estimation = dict(reward_estimation)
if reward_estimation['horizon'] == 'episode':
if max_episode_timesteps is None:
raise TensorforceError.unexpected()
reward_estimation['horizon'] = max_episode_timesteps
if 'capacity' not in reward_estimation:
# TODO: Doesn't take network horizon into account, needs to be set internally to max
# if isinstance(reward_estimation['horizon'], int):
# reward_estimation['capacity'] = max(
# self.buffer_observe, reward_estimation['horizon'] + 2
# )
if max_episode_timesteps is None:
raise TensorforceError.unexpected()
if isinstance(reward_estimation['horizon'], int):
reward_estimation['capacity'] = max(
max_episode_timesteps, reward_estimation['horizon']
)
else:
reward_estimation['capacity'] = max_episode_timesteps
self.experience_size = reward_estimation['capacity']
if memory is None:
# predecessor/successor?
if max_episode_timesteps is None or not isinstance(update['batch_size'], int) \
or not isinstance(reward_estimation['horizon'], int):
raise TensorforceError.unexpected()
if update['unit'] == 'timesteps':
memory = update['batch_size'] + max_episode_timesteps + \
reward_estimation['horizon']
input_spec = dict(type='float', shape=self.embedding_shape)
if len(self.embedding_shape) == 1:
action_size = util.product(xs=self.action_spec['shape'], empty=0)
self.alpha = self.add_module(
name='alpha', module='linear', modules=layer_modules, size=action_size,
input_spec=input_spec
)
self.beta = self.add_module(
name='beta', module='linear', modules=layer_modules, size=action_size,
input_spec=input_spec
)
else:
if len(self.embedding_shape) < 1 or len(self.embedding_shape) > 3:
raise TensorforceError.unexpected()
if self.embedding_shape[:-1] == self.action_spec['shape'][:-1]:
size = self.action_spec['shape'][-1]
elif self.embedding_shape[:-1] == self.action_spec['shape']:
size = 0
else:
raise TensorforceError.unexpected()
self.alpha = self.add_module(
name='alpha', module='linear', modules=layer_modules, size=size,
input_spec=input_spec
)
self.beta = self.add_module(
name='beta', module='linear', modules=layer_modules, size=size,
input_spec=input_spec
)
Module.register_tensor(
def get_output_tensors(self, function):
"""
Returns the names of output tensors for the given function.
Args:
function (str): Function name
(<span style="color:#C00000"><b>required</b></span>).
Returns:
list[str]: Names of output tensors.
"""
if function in self.model.output_tensors:
return self.model.output_tensors[function]
else:
raise TensorforceError.unexpected()