Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_run_continuous(self):
from tf2rl.algos.ddpg import DDPG
parser = DDPG.get_argument(self.parser)
parser.set_defaults(n_warmup=1)
args, _ = parser.parse_known_args()
def env_fn():
return gym.make('Pendulum-v0')
def policy_fn(env, name, memory_capacity=int(1e6), gpu=-1, *args, **kwargs):
return DDPG(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
n_warmup=500,
gpu=-1)
def get_weights_fn(policy):
return [policy.actor.weights,
policy.critic.weights,
def get_argument(parser=None):
parser = DDPG.get_argument(parser)
parser.add_argument('--eta', type=float, default=0.05)
return parser
import argparse
import numpy as np
import gym
import roboschool
from tf2rl.algos.apex import apex_argument, run
from tf2rl.algos.ddpg import DDPG
from tf2rl.misc.target_update_ops import update_target_variables
if __name__ == '__main__':
parser = apex_argument()
parser.add_argument('--env-name', type=str,
default="RoboschoolAtlasForwardWalk-v1")
parser = DDPG.get_argument(parser)
args = parser.parse_args()
# Prepare env and policy function
def env_fn():
return gym.make(args.env_name)
def policy_fn(env, name, memory_capacity=int(1e6),
gpu=-1, noise_level=0.3):
return DDPG(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
max_action=env.action_space.high[0],
gpu=gpu,
name=name,
sigma=noise_level,
batch_size=100,
import roboschool
import gym
from tf2rl.algos.ddpg import DDPG
from tf2rl.experiments.trainer import Trainer
if __name__ == '__main__':
parser = Trainer.get_argument()
parser = DDPG.get_argument(parser)
parser.add_argument('--env-name', type=str, default="RoboschoolAnt-v1")
parser.set_defaults(batch_size=100)
parser.set_defaults(n_warmup=10000)
args = parser.parse_args()
env = gym.make(args.env_name)
test_env = gym.make(args.env_name)
policy = DDPG(
state_shape=env.observation_space.shape,
action_dim=env.action_space.high.size,
gpu=args.gpu,
memory_capacity=args.memory_capacity,
max_action=env.action_space.high[0],
batch_size=args.batch_size,
n_warmup=args.n_warmup)
trainer = Trainer(policy, env, args, test_env=test_env)