Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _tasks(config, params):
tasks = params.get('tasks', ['cheetah_run'])
tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]
config.isolate_envs = params.get('isolate_envs', 'thread')
def common_spaces_ctor(task, action_spaces):
env = task.env_ctor()
env = control.wrappers.SelectObservations(env, ['image'])
env = control.wrappers.PadActions(env, action_spaces)
return env
if len(tasks) > 1:
action_spaces = [task.env_ctor().action_space for task in tasks]
for index, task in enumerate(tasks):
env_ctor = tools.bind(common_spaces_ctor, task, action_spaces)
tasks[index] = tasks_lib.Task(
task.name, env_ctor, task.max_length, ['reward'])
for name in tasks[0].state_components:
if name == 'reward' or params.get('state_diagnostics', False):
config.heads[name] = tools.bind(
config.head_network,
stop_gradient=name not in config.gradient_heads)
config.loss_scales[name] = 1.0
config.tasks = tasks
return config
def feed_forward(
state, data_shape, num_layers=2, activation=tf.nn.relu,
mean_activation=None, stop_gradient=False, trainable=True, units=100,
std=1.0, low=-1.0, high=1.0, dist='normal'):
"""Create a model returning unnormalized MSE distribution."""
hidden = state
if stop_gradient:
hidden = tf.stop_gradient(hidden)
for _ in range(num_layers):
hidden = tf.layers.dense(hidden, units, activation)
mean = tf.layers.dense(
hidden, int(np.prod(data_shape)), mean_activation, trainable=trainable)
mean = tf.reshape(mean, tools.shape(state)[:-1] + data_shape)
if std == 'learned':
std = tf.layers.dense(
hidden, int(np.prod(data_shape)), None, trainable=trainable)
std = tf.nn.softplus(std + 0.55) + 0.01
std = tf.reshape(std, tools.shape(state)[:-1] + data_shape)
if dist == 'normal':
dist = tfd.Normal(mean, std)
elif dist == 'truncated_normal':
# https://www.desmos.com/calculator/3o96eyqxib
dist = tfd.TruncatedNormal(mean, std, low, high)
elif dist == 'tanh_normal':
# https://www.desmos.com/calculator/sxpp7ectjv
dist = tfd.Normal(mean, std)
dist = tfd.TransformedDistribution(dist, tfp.bijectors.Tanh())
elif dist == 'deterministic':
dist = tfd.Deterministic(mean)
def process(logdir, args):
with args.params.unlocked:
args.params.logdir = logdir
config = tools.AttrDict()
with config.unlocked:
config = getattr(configs, args.config)(config, args.params)
training.utility.collect_initial_episodes(config)
tf.reset_default_graph()
dataset = tools.numpy_episodes.numpy_episodes(
config.train_dir, config.test_dir, config.batch_shape,
reader=config.data_reader,
loader=config.data_loader,
num_chunks=config.num_chunks,
preprocess_fn=config.preprocess_fn)
for score in training.utility.train(
training.define_model, dataset, logdir, config):
yield score
raise KeyError('You must specify a configuration.')
logdir = logdir and os.path.expanduser(logdir)
try:
config = load_config(logdir)
except RuntimeError:
print('Failed to load existing config.')
except IOError:
config = save_config(config, logdir)
trainer = trainer_.Trainer(logdir, config=config)
cleanups = []
try:
with tf.variable_scope('graph', use_resource=True):
data = get_batch(datasets, trainer.phase, trainer.reset)
score, summary, cleanups = model_fn(data, trainer, config)
message = 'Graph contains {} trainable variables.'
tf.logging.info(message.format(tools.count_weights()))
if config.train_steps:
trainer.add_phase(
'train', config.train_steps, score, summary,
batch_size=config.batch_shape[0],
report_every=None,
log_every=config.train_log_every,
checkpoint_every=config.train_checkpoint_every)
if config.test_steps:
trainer.add_phase(
'test', config.test_steps, score, summary,
batch_size=config.batch_shape[0],
report_every=config.test_steps,
log_every=config.test_steps,
checkpoint_every=config.test_checkpoint_every)
for saver in config.savers:
trainer.add_saver(**saver)
summaries = []
with tf.variable_scope(name):
return_, image, action, reward, cleanup = collect_rollouts(
step=step,
env_ctor=env_ctor,
duration=duration,
num_agents=num_agents,
agent_config=agent_config,
isolate_envs=isolate_envs)
return_mean = tf.reduce_mean(return_)
summaries.append(tf.summary.scalar('return', return_mean))
if expensive_summaries:
summaries.append(tf.summary.histogram('return_hist', return_))
summaries.append(tf.summary.histogram('reward_hist', reward))
summaries.append(tf.summary.histogram('action_hist', action))
summaries.append(tools.image_strip_summary(
'image', image, max_length=duration))
if gif_summary:
summaries.append(tools.gif_summary(
'animation', image, max_outputs=1, fps=20))
summary = tf.summary.merge(summaries)
return summary, return_mean, cleanup
def add_saver(
self, include=r'.*', exclude=r'.^', logdir=None, load=True, save=True,
checkpoint=None):
"""Add a saver to save or load variables.
Args:
include: One or more regexes to match variable names to include.
exclude: One or more regexes to match variable names to exclude.
logdir: Directory for saver to store and search for checkpoints.
load: Whether to use the saver to restore variables.
save: Whether to use the saver to save variables.
checkpoint: Checkpoint name to load; None for newest.
"""
variables = tools.filter_variables(include, exclude)
saver = tf.train.Saver(variables, keep_checkpoint_every_n_hours=2)
if load:
self._loaders.append(saver)
if save:
self._savers.append(saver)
self._logdirs.append(logdir or self._logdir)
if checkpoint is None and self._config.checkpoint_to_load:
self._checkpoints.append(
os.path.join(self._logdirs[-1], self._config.checkpoint_to_load))
else:
self._checkpoints.append(checkpoint)
def _data_processing(config, params):
config.batch_shape = params.get('batch_shape', (50, 50))
config.num_chunks = params.get('num_chunks', 1)
image_bits = params.get('image_bits', 5)
config.preprocess_fn = tools.bind(
tools.preprocess.preprocess, bits=image_bits)
config.postprocess_fn = tools.bind(
tools.preprocess.postprocess, bits=image_bits)
config.open_loop_context = 5
config.data_reader = tools.numpy_episodes.episode_reader
config.data_loader = {
'cache': tools.bind(
tools.numpy_episodes.cache_loader,
every=params.get('loader_every', 1000)),
'recent': tools.bind(
tools.numpy_episodes.recent_loader,
every=params.get('loader_every', 1000)),
'reload': tools.numpy_episodes.reload_loader,
'dummy': tools.numpy_episodes.dummy_loader,
}[params.get('loader', 'recent')]
config.bound_action = tools.bind(
tools.bound_action,
strategy=params.get('bound_action', 'clip'))
return config
def default(config, params):
config.debug = False
config.loss_scales = tools.AttrDict(_unlocked=True)
config = _data_processing(config, params)
config = _model_components(config, params)
config = _tasks(config, params)
config = _loss_functions(config, params)
config = _training_schedule(config, params)
return config
def _loss_functions(config, params):
for head in config.gradient_heads:
assert head in config.heads, head
config.loss_scales.divergence = params.get('divergence_scale', 1.0)
config.loss_scales.global_divergence = params.get('global_div_scale', 0.0)
config.loss_scales.overshooting = params.get('overshooting_scale', 0.0)
for head in config.heads:
defaults = {'reward': 10.0}
scale = defaults[head] if head in defaults else 1.0
config.loss_scales[head] = params.get(head + '_loss_scale', scale)
config.free_nats = params.get('free_nats', 3.0)
config.overshooting_distance = params.get('overshooting_distance', 0)
config.os_stop_posterior_grad = params.get('os_stop_posterior_grad', True)
config.optimizers = tools.AttrDict(_unlocked=True)
config.optimizers.main = tools.bind(
tools.CustomOptimizer,
optimizer_cls=tools.bind(tf.train.AdamOptimizer, epsilon=1e-4),
# schedule=tools.bind(tools.schedule.linear, ramp=0),
learning_rate=params.get('main_learning_rate', 1e-3),
clipping=params.get('main_gradient_clipping', 1000.0))
return config
def dist_from_state(self, state, mask=None):
"""Extract the latent distribution from a prior or posterior state."""
if mask is not None:
stddev = tools.mask(state['stddev'], mask, value=1)
else:
stddev = state['stddev']
dist = tfd.MultivariateNormalDiag(state['mean'], stddev)
return dist