How to use the lagom.envs.make_vec_env function in lagom

To help you get started, we’ve selected a few lagom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zuoxingdong / lagom / test / test_vec_env.py View on Github external
assert len(running_avg) == 2 and 'obs_avg' in running_avg and 'r_avg' in running_avg
    assert 'mu' in running_avg['obs_avg'] and 'sigma' in running_avg['obs_avg']
    assert not np.allclose(running_avg['obs_avg']['mu'], 0.0)
    assert not np.allclose(running_avg['obs_avg']['sigma'], 0.0)
    assert 'mu' not in running_avg['r_avg']
    assert 'sigma' in running_avg['r_avg']
    assert running_avg['r_avg']['sigma'] is None

    a = [1]*5
    obs, rewards, _, _ = venv.step(a)
    assert rewards.max() == 0.0001

    del venv, obs, a

    # other settings: turn off use_obs
    venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, True)
    venv = VecStandardize(venv, 
                          use_obs=False, 
                          use_reward=False, 
                          clip_obs=0.001, 
                          clip_reward=0.0001, 
                          gamma=0.99, 
                          eps=1e-8)
    obs = venv.reset()
    assert np.asarray(obs).max() > 0.001
    a = [1]*5
    obs, rewards, _, _ = venv.step(a)
    assert np.asarray(rewards).max() >= 0.0001

    del venv, obs, a

    # other settings: gamma
github zuoxingdong / lagom / baselines / ppo / logs / default / source_files / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        if config['env.clip_action'] and isinstance(env.action_space, Box):
            env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
github zuoxingdong / lagom / baselines / td3 / logs / default / source_files / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
github zuoxingdong / lagom / examples / reinforcement_learning / td3 / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
github zuoxingdong / lagom / examples / reinforcement_learning / vpg / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        if config['env.time_aware_obs']:
            env = TimeAwareObservation(env)
        if config['env.clip_action'] and isinstance(env.action_space, Box):
            env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed, 'serial')  # single environment
    return env
github zuoxingdong / lagom / examples / reinforcement_learning / vpg / logs / default / source_files / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        if config['env.clip_action'] and isinstance(env.action_space, Box):
            env = ClipAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
github zuoxingdong / lagom / baselines / sac / logs / old_default / source_files / experiment.py View on Github external
def make_env(config, seed):
    def _make_env():
        env = gym.make(config['env.id'])
        env = env.env  # strip out gym TimeLimit, TODO: remove until gym update it
        env = TimeLimit(env, env.spec.max_episode_steps)
        env = NormalizeAction(env)
        return env
    env = make_vec_env(_make_env, 1, seed)  # single environment
    return env
github zuoxingdong / lagom / examples / policy_gradient / reinforce / algo.py View on Github external
def __call__(self, config, seed, device):
        set_global_seeds(seed)
        logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)

        env = make_vec_env(vec_env_class=SerialVecEnv, 
                           make_env=make_gym_env, 
                           env_id=config['env.id'], 
                           num_env=config['train.N'],  # batched environment
                           init_seed=seed)
        eval_env = make_vec_env(vec_env_class=SerialVecEnv, 
                                make_env=make_gym_env, 
                                env_id=config['env.id'], 
                                num_env=config['eval.N'], 
                                init_seed=seed)
        if config['env.standardize']:  # running averages of observation and reward
            env = VecStandardize(venv=env, 
                                 use_obs=True, 
                                 use_reward=True, 
                                 clip_obs=10., 
                                 clip_reward=10., 
                                 gamma=0.99,