Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert len(running_avg) == 2 and 'obs_avg' in running_avg and 'r_avg' in running_avg
assert 'mu' in running_avg['obs_avg'] and 'sigma' in running_avg['obs_avg']
assert not np.allclose(running_avg['obs_avg']['mu'], 0.0)
assert not np.allclose(running_avg['obs_avg']['sigma'], 0.0)
assert 'mu' not in running_avg['r_avg']
assert 'sigma' in running_avg['r_avg']
assert running_avg['r_avg']['sigma'] is None
a = [1]*5
obs, rewards, _, _ = venv.step(a)
assert rewards.max() == 0.0001
del venv, obs, a
# other settings: turn off use_obs
venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, True)
venv = VecStandardize(venv,
use_obs=False,
use_reward=False,
clip_obs=0.001,
clip_reward=0.0001,
gamma=0.99,
eps=1e-8)
obs = venv.reset()
assert np.asarray(obs).max() > 0.001
a = [1]*5
obs, rewards, _, _ = venv.step(a)
assert np.asarray(rewards).max() >= 0.0001
del venv, obs, a
# other settings: gamma
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
if config['env.clip_action'] and isinstance(env.action_space, Box):
env = ClipAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
env = ClipAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
env = ClipAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
if config['env.time_aware_obs']:
env = TimeAwareObservation(env)
if config['env.clip_action'] and isinstance(env.action_space, Box):
env = ClipAction(env)
return env
env = make_vec_env(_make_env, 1, seed, 'serial') # single environment
return env
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
if config['env.clip_action'] and isinstance(env.action_space, Box):
env = ClipAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env
def make_env(config, seed):
def _make_env():
env = gym.make(config['env.id'])
env = env.env # strip out gym TimeLimit, TODO: remove until gym update it
env = TimeLimit(env, env.spec.max_episode_steps)
env = NormalizeAction(env)
return env
env = make_vec_env(_make_env, 1, seed) # single environment
return env
def __call__(self, config, seed, device):
set_global_seeds(seed)
logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
env = make_vec_env(vec_env_class=SerialVecEnv,
make_env=make_gym_env,
env_id=config['env.id'],
num_env=config['train.N'], # batched environment
init_seed=seed)
eval_env = make_vec_env(vec_env_class=SerialVecEnv,
make_env=make_gym_env,
env_id=config['env.id'],
num_env=config['eval.N'],
init_seed=seed)
if config['env.standardize']: # running averages of observation and reward
env = VecStandardize(venv=env,
use_obs=True,
use_reward=True,
clip_obs=10.,
clip_reward=10.,
gamma=0.99,