How to use the planet.tools function in planet

To help you get started, we’ve selected a few planet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github google-research / planet / planet / scripts / configs.py View on Github external
def _tasks(config, params):
  tasks = params.get('tasks', ['cheetah_run'])
  tasks = [getattr(tasks_lib, name)(config, params) for name in tasks]
  config.isolate_envs = params.get('isolate_envs', 'thread')
  def common_spaces_ctor(task, action_spaces):
    env = task.env_ctor()
    env = control.wrappers.SelectObservations(env, ['image'])
    env = control.wrappers.PadActions(env, action_spaces)
    return env
  if len(tasks) > 1:
    action_spaces = [task.env_ctor().action_space for task in tasks]
    for index, task in enumerate(tasks):
      env_ctor = tools.bind(common_spaces_ctor, task, action_spaces)
      tasks[index] = tasks_lib.Task(
          task.name, env_ctor, task.max_length, ['reward'])
  for name in tasks[0].state_components:
    if name == 'reward' or params.get('state_diagnostics', False):
      config.heads[name] = tools.bind(
          config.head_network,
          stop_gradient=name not in config.gradient_heads)
      config.loss_scales[name] = 1.0
  config.tasks = tasks
  return config
github google-research / planet / planet / networks / basic.py View on Github external
def feed_forward(
    state, data_shape, num_layers=2, activation=tf.nn.relu,
    mean_activation=None, stop_gradient=False, trainable=True, units=100,
    std=1.0, low=-1.0, high=1.0, dist='normal'):
  """Create a model returning unnormalized MSE distribution."""
  hidden = state
  if stop_gradient:
    hidden = tf.stop_gradient(hidden)
  for _ in range(num_layers):
    hidden = tf.layers.dense(hidden, units, activation)
  mean = tf.layers.dense(
      hidden, int(np.prod(data_shape)), mean_activation, trainable=trainable)
  mean = tf.reshape(mean, tools.shape(state)[:-1] + data_shape)
  if std == 'learned':
    std = tf.layers.dense(
        hidden, int(np.prod(data_shape)), None, trainable=trainable)
    std = tf.nn.softplus(std + 0.55) + 0.01
    std = tf.reshape(std, tools.shape(state)[:-1] + data_shape)
  if dist == 'normal':
    dist = tfd.Normal(mean, std)
  elif dist == 'truncated_normal':
    # https://www.desmos.com/calculator/3o96eyqxib
    dist = tfd.TruncatedNormal(mean, std, low, high)
  elif dist == 'tanh_normal':
    # https://www.desmos.com/calculator/sxpp7ectjv
    dist = tfd.Normal(mean, std)
    dist = tfd.TransformedDistribution(dist, tfp.bijectors.Tanh())
  elif dist == 'deterministic':
    dist = tfd.Deterministic(mean)
github google-research / planet / planet / scripts / train.py View on Github external
def process(logdir, args):
  with args.params.unlocked:
    args.params.logdir = logdir
  config = tools.AttrDict()
  with config.unlocked:
    config = getattr(configs, args.config)(config, args.params)
  training.utility.collect_initial_episodes(config)
  tf.reset_default_graph()
  dataset = tools.numpy_episodes.numpy_episodes(
      config.train_dir, config.test_dir, config.batch_shape,
      reader=config.data_reader,
      loader=config.data_loader,
      num_chunks=config.num_chunks,
      preprocess_fn=config.preprocess_fn)
  for score in training.utility.train(
      training.define_model, dataset, logdir, config):
    yield score
github google-research / planet / planet / training / utility.py View on Github external
raise KeyError('You must specify a configuration.')
  logdir = logdir and os.path.expanduser(logdir)
  try:
    config = load_config(logdir)
  except RuntimeError:
    print('Failed to load existing config.')
  except IOError:
    config = save_config(config, logdir)
  trainer = trainer_.Trainer(logdir, config=config)
  cleanups = []
  try:
    with tf.variable_scope('graph', use_resource=True):
      data = get_batch(datasets, trainer.phase, trainer.reset)
      score, summary, cleanups = model_fn(data, trainer, config)
      message = 'Graph contains {} trainable variables.'
      tf.logging.info(message.format(tools.count_weights()))
      if config.train_steps:
        trainer.add_phase(
            'train', config.train_steps, score, summary,
            batch_size=config.batch_shape[0],
            report_every=None,
            log_every=config.train_log_every,
            checkpoint_every=config.train_checkpoint_every)
      if config.test_steps:
        trainer.add_phase(
            'test', config.test_steps, score, summary,
            batch_size=config.batch_shape[0],
            report_every=config.test_steps,
            log_every=config.test_steps,
            checkpoint_every=config.test_checkpoint_every)
    for saver in config.savers:
      trainer.add_saver(**saver)
github google-research / planet / planet / control / simulate.py View on Github external
summaries = []
  with tf.variable_scope(name):
    return_, image, action, reward, cleanup = collect_rollouts(
        step=step,
        env_ctor=env_ctor,
        duration=duration,
        num_agents=num_agents,
        agent_config=agent_config,
        isolate_envs=isolate_envs)
    return_mean = tf.reduce_mean(return_)
    summaries.append(tf.summary.scalar('return', return_mean))
    if expensive_summaries:
      summaries.append(tf.summary.histogram('return_hist', return_))
      summaries.append(tf.summary.histogram('reward_hist', reward))
      summaries.append(tf.summary.histogram('action_hist', action))
      summaries.append(tools.image_strip_summary(
          'image', image, max_length=duration))
    if gif_summary:
      summaries.append(tools.gif_summary(
          'animation', image, max_outputs=1, fps=20))
  summary = tf.summary.merge(summaries)
  return summary, return_mean, cleanup
github google-research / planet / planet / training / trainer.py View on Github external
def add_saver(
      self, include=r'.*', exclude=r'.^', logdir=None, load=True, save=True,
      checkpoint=None):
    """Add a saver to save or load variables.

    Args:
      include: One or more regexes to match variable names to include.
      exclude: One or more regexes to match variable names to exclude.
      logdir: Directory for saver to store and search for checkpoints.
      load: Whether to use the saver to restore variables.
      save: Whether to use the saver to save variables.
      checkpoint: Checkpoint name to load; None for newest.
    """
    variables = tools.filter_variables(include, exclude)
    saver = tf.train.Saver(variables, keep_checkpoint_every_n_hours=2)
    if load:
      self._loaders.append(saver)
    if save:
      self._savers.append(saver)
    self._logdirs.append(logdir or self._logdir)
    if checkpoint is None and self._config.checkpoint_to_load:
      self._checkpoints.append(
          os.path.join(self._logdirs[-1], self._config.checkpoint_to_load))
    else:
      self._checkpoints.append(checkpoint)
github google-research / planet / planet / scripts / configs.py View on Github external
def _data_processing(config, params):
  config.batch_shape = params.get('batch_shape', (50, 50))
  config.num_chunks = params.get('num_chunks', 1)
  image_bits = params.get('image_bits', 5)
  config.preprocess_fn = tools.bind(
      tools.preprocess.preprocess, bits=image_bits)
  config.postprocess_fn = tools.bind(
      tools.preprocess.postprocess, bits=image_bits)
  config.open_loop_context = 5
  config.data_reader = tools.numpy_episodes.episode_reader
  config.data_loader = {
      'cache': tools.bind(
          tools.numpy_episodes.cache_loader,
          every=params.get('loader_every', 1000)),
      'recent': tools.bind(
          tools.numpy_episodes.recent_loader,
          every=params.get('loader_every', 1000)),
      'reload': tools.numpy_episodes.reload_loader,
      'dummy': tools.numpy_episodes.dummy_loader,
  }[params.get('loader', 'recent')]
  config.bound_action = tools.bind(
      tools.bound_action,
      strategy=params.get('bound_action', 'clip'))
  return config
github google-research / planet / planet / scripts / configs.py View on Github external
def default(config, params):
  config.debug = False
  config.loss_scales = tools.AttrDict(_unlocked=True)
  config = _data_processing(config, params)
  config = _model_components(config, params)
  config = _tasks(config, params)
  config = _loss_functions(config, params)
  config = _training_schedule(config, params)
  return config
github google-research / planet / planet / scripts / configs.py View on Github external
def _loss_functions(config, params):
  for head in config.gradient_heads:
    assert head in config.heads, head
  config.loss_scales.divergence = params.get('divergence_scale', 1.0)
  config.loss_scales.global_divergence = params.get('global_div_scale', 0.0)
  config.loss_scales.overshooting = params.get('overshooting_scale', 0.0)
  for head in config.heads:
    defaults = {'reward': 10.0}
    scale = defaults[head] if head in defaults else 1.0
    config.loss_scales[head] = params.get(head + '_loss_scale', scale)
  config.free_nats = params.get('free_nats', 3.0)
  config.overshooting_distance = params.get('overshooting_distance', 0)
  config.os_stop_posterior_grad = params.get('os_stop_posterior_grad', True)
  config.optimizers = tools.AttrDict(_unlocked=True)
  config.optimizers.main = tools.bind(
      tools.CustomOptimizer,
      optimizer_cls=tools.bind(tf.train.AdamOptimizer, epsilon=1e-4),
      # schedule=tools.bind(tools.schedule.linear, ramp=0),
      learning_rate=params.get('main_learning_rate', 1e-3),
      clipping=params.get('main_gradient_clipping', 1000.0))
  return config
github google-research / planet / planet / models / rssm.py View on Github external
def dist_from_state(self, state, mask=None):
    """Extract the latent distribution from a prior or posterior state."""
    if mask is not None:
      stddev = tools.mask(state['stddev'], mask, value=1)
    else:
      stddev = state['stddev']
    dist = tfd.MultivariateNormalDiag(state['mean'], stddev)
    return dist