Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
policy = PPO(
state_shape=state_shape,
action_dim=action_dim,
is_discrete=True,
actor_critic=actor_critic,
batch_size=args.batch_size,
n_epoch=3,
lr_actor=2.5e-4,
lr_critic=2.5e-4,
discount=0.99,
lam=0.95,
horizon=args.horizon,
normalize_adv=args.normalize_adv,
enable_gae=args.enable_gae,
gpu=args.gpu)
trainer = OnPolicyTrainer(policy, env, args, test_env=test_env)
trainer()
policy = VPG(
state_shape=env.observation_space.shape,
action_dim=get_act_dim(env.action_space),
is_discrete=is_discrete(env.action_space),
max_action=None if is_discrete(
env.action_space) else env.action_space.high[0],
batch_size=args.batch_size,
actor_units=[32, 32],
critic_units=[32, 32],
discount=0.9,
horizon=args.horizon,
fix_std=True,
normalize_adv=args.normalize_adv,
enable_gae=args.enable_gae,
gpu=args.gpu)
trainer = OnPolicyTrainer(policy, env, args, test_env=test_env)
trainer()
max_action=None if is_discrete(
env.action_space) else env.action_space.high[0],
batch_size=args.batch_size,
actor_units=[64, 64],
critic_units=[64, 64],
n_epoch=10,
n_epoch_critic=10,
lr_actor=3e-4,
lr_critic=3e-4,
discount=0.99,
lam=0.95,
horizon=args.horizon,
normalize_adv=args.normalize_adv,
enable_gae=args.enable_gae,
gpu=args.gpu)
trainer = OnPolicyTrainer(policy, env, args, test_env=test_env)
trainer()
test_env = gym.make(args.env_name)
policy = PPO(
state_shape=env.observation_space.shape,
action_dim=get_act_dim(env.action_space),
is_discrete=is_discrete(env.action_space),
max_action=None if is_discrete(
env.action_space) else env.action_space.high[0],
batch_size=args.batch_size,
actor_units=[32, 32],
critic_units=[32, 32],
discount=0.9,
horizon=args.horizon,
normalize_adv=args.normalize_adv,
enable_gae=args.enable_gae,
gpu=args.gpu)
trainer = OnPolicyTrainer(policy, env, args, test_env=test_env)
trainer()