Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def execute(self, actions):
if not self.is_valid_actions(actions, self._states):
raise TensorforceError.value(name='actions', value=actions)
self.timestep += 1
self._states = self.random_states()
terminal = (self.timestep >= self.min_timesteps and random() < 0.25)
reward = -1.0 + 2.0 * random()
return self._states, terminal, reward
def normalize_values(value_type, values, values_spec):
if not is_valid_value_type(value_type=value_type):
raise TensorforceError.value(name='value_type', value=value_type)
if len(values_spec) == 1 and next(iter(values_spec)) == value_type:
# Spec defines only a single value
if isinstance(values, dict):
if len(values) != 1 or value_type not in values:
TensorforceError.value(name=(value_type + ' spec'), value=values)
return values
else:
return OrderedDict([(value_type, values)])
normalized_values = OrderedDict()
for normalized_name in values_spec:
value = values
for name in normalized_name.split('/'):
value = value[name]
def unpack_values(value_type, values, values_spec):
if not is_valid_value_type(value_type=value_type):
raise TensorforceError.value(name='value_type', value=value_type)
if len(values_spec) == 1 and next(iter(values_spec)) == value_type:
# Spec defines only a single value
return values[value_type]
unpacked_values = dict()
for normalized_name in values_spec:
unpacked_value = unpacked_values
names = normalized_name.split('/')
for name in names[:-1]:
if name not in unpacked_value:
unpacked_value[name] = dict()
unpacked_value = unpacked_value[name]
unpacked_value[names[-1]] = values.pop(normalized_name)
if len(values) > 0:
def dtype(x):
for dtype, tf_dtype in tf_dtype_mapping.items():
if x.dtype == tf_dtype:
return dtype
else:
if x.dtype == tf.float32:
return 'float'
else:
raise TensorforceError.value(name='dtype', value=x.dtype)
)
# Parallel buffer indices
self.buffer_indices = np.zeros(
shape=(self.parallel_interactions,), dtype=util.np_dtype(dtype='int')
)
self.timesteps = 0
self.episodes = 0
self.updates = 0
# Recorder
if recorder is None:
pass
elif not all(key in ('directory', 'frequency', 'max-traces') for key in recorder):
raise TensorforceError.value(name='recorder', value=list(recorder))
self.recorder_spec = recorder
if self.recorder_spec is not None:
self.record_states = OrderedDict(((name, list()) for name in self.states_spec))
for name, spec in self.actions_spec.items():
if spec['type'] == 'int':
self.record_states[name + '_mask'] = list()
self.record_actions = OrderedDict(((name, list()) for name in self.actions_spec))
self.record_terminal = list()
self.record_reward = list()
self.num_episodes = 0
def add_variable(
self, name, dtype, shape, is_trainable, initializer='zeros', is_saved=True, summarize=None,
shared=None
):
# name
if not util.is_valid_name(name=name):
raise TensorforceError.value(name='variable', argument='name', value=name)
elif name in self.variables:
raise TensorforceError.exists(name='variable', value=name)
# dtype
if not util.is_valid_type(dtype=dtype):
raise TensorforceError.value(name='variable', argument='dtype', value=dtype)
# shape
if not util.is_iterable(x=shape) or not all(isinstance(dims, int) for dims in shape):
raise TensorforceError.type(name='variable', argument='shape', value=shape)
elif not all(dims > 0 for dims in shape):
raise TensorforceError.value(name='variable', argument='shape', value=shape)
# is_trainable
if not isinstance(is_trainable, bool):
raise TensorforceError.type(
name='variable', argument='is_trainable', value=is_trainable
)
elif is_trainable and dtype != 'float':
raise TensorforceError.unexpected()
# initializer
initializer_names = (
'normal', 'normal-relu', 'orthogonal', 'orthogonal-relu', 'zeros', 'ones'
)
if not is_valid_type(dtype=dtype):
raise TensorforceError.value(name=(value_type + ' spec'), argument='type', value=dtype)
if return_normalized:
normalized_spec['type'] = reverse_dtype_mapping.get(dtype, dtype)
if value_type == 'action' and return_normalized:
shape = value_spec.pop('shape', ())
else:
shape = value_spec.pop('shape')
if accept_underspecified and shape is None:
if return_normalized:
normalized_spec['shape'] = None
elif is_iterable(x=shape):
start = int(accept_underspecified and len(shape) > 0 and shape[0] is None)
if not all(isinstance(dims, int) for dims in shape[start:]):
raise TensorforceError.value(
name=(value_type + ' spec'), argument='shape', value=shape
)
if accept_underspecified:
if not all(dims >= -1 for dims in shape[start:]):
raise TensorforceError.value(
name=(value_type + ' spec'), argument='shape', value=shape
)
else:
if not all(dims > 0 or dims == -1 for dims in shape):
raise TensorforceError.value(
name=(value_type + ' spec'), argument='shape', value=shape
)
if return_normalized:
normalized_spec['shape'] = tuple(shape)
elif return_normalized:
if not isinstance(shape, int):
def np_dtype(dtype):
"""Translates dtype specifications in configurations to numpy data types.
Args:
dtype: String describing a numerical type (e.g. 'float') or numerical type primitive.
Returns: Numpy data type
"""
if dtype in np_dtype_mapping:
return np_dtype_mapping[dtype]
else:
raise TensorforceError.value(name='dtype', value=dtype)
# Optimizer
self.optimizer = self.add_module(
name='optimizer', module=optimizer, modules=optimizer_modules, is_trainable=False
)
# Objective
self.objective = self.add_module(
name='objective', module=objective, modules=objective_modules, is_trainable=False
)
# Estimator
if not all(key in (
'capacity', 'discount', 'estimate_actions', 'estimate_advantage', 'estimate_horizon',
'estimate_terminal', 'horizon'
) for key in reward_estimation):
raise TensorforceError.value(name='reward_estimation', value=list(reward_estimation))
if baseline_policy is None and baseline_optimizer is None and baseline_objective is None:
estimate_horizon = False
else:
estimate_horizon = 'late'
self.estimator = self.add_module(
name='estimator', module=Estimator, is_trainable=False, is_saved=False,
values_spec=self.values_spec, horizon=reward_estimation['horizon'],
discount=reward_estimation.get('discount', 1.0),
estimate_horizon=reward_estimation.get('estimate_horizon', estimate_horizon),
estimate_actions=reward_estimation.get('estimate_actions', False),
estimate_terminal=reward_estimation.get('estimate_terminal', False),
estimate_advantage=reward_estimation.get('estimate_advantage', False),
capacity=reward_estimation['capacity']
)
# Baseline
def __init__(
self, agent, environment=None, num_parallel=None, environments=None,
max_episode_timesteps=None, evaluation_environment=None, save_best_agent=None
):
self.environments = list()
if environment is None:
assert num_parallel is None and environments is not None
if not util.is_iterable(x=environments):
raise TensorforceError.type(
name='parallel-runner', argument='environments', value=environments
)
elif len(environments) == 0:
raise TensorforceError.value(
name='parallel-runner', argument='environments', value=environments
)
num_parallel = len(environments)
environment = environments[0]
self.is_environment_external = isinstance(environment, Environment)
environment = Environment.create(
environment=environment, max_episode_timesteps=max_episode_timesteps
)
states = environment.states()
actions = environment.actions()
self.environments.append(environment)
for environment in environments[1:]:
assert isinstance(environment, Environment) == self.is_environment_external
environment = Environment.create(
environment=environment, max_episode_timesteps=max_episode_timesteps
)