Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
num_iterations=20,
meta_bsz=10,
adapt_bsz=10,
tau=1.00,
gamma=0.99,
num_workers=1,
seed=42,
):
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
def make_env():
return gym.make(env_name)
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(env.state_size, env.action_size)
meta_learner = l2l.MAML(policy, lr=meta_lr)
baseline = LinearValue(env.state_size, env.action_size)
opt = optim.Adam(meta_learner.parameters(), lr=meta_lr)
all_rewards = []
for iteration in range(num_iterations):
iteration_reward = 0.0
iteration_replays = []
iteration_policies = []
for task_config in tqdm(env.sample_tasks(meta_bsz), leave=False, desc='Data'): # Samples a new config
learner = meta_learner.clone()
env.reset_task(task_config)
num_iterations=200,
meta_bsz=20,
adapt_bsz=20,
tau=1.00,
gamma=0.99,
num_workers=2,
seed=42,
):
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
def make_env():
return gym.make(env_name)
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(env.state_size, env.action_size)
meta_learner = l2l.algorithms.MetaSGD(policy, lr=meta_lr)
baseline = LinearValue(env.state_size, env.action_size)
opt = optim.Adam(policy.parameters(), lr=meta_lr)
all_rewards = []
for iteration in range(num_iterations):
iteration_loss = 0.0
iteration_reward = 0.0
for task_config in tqdm(env.sample_tasks(meta_bsz)): # Samples a new config
learner = meta_learner.clone()
env.set_task(task_config)
env.reset()
task = ch.envs.Runner(env)
eta=0.0005,
adaptive_penalty=False,
kl_target=0.01,
num_workers=4,
seed=421,
):
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
def make_env():
env = gym.make(env_name)
env = ch.envs.ActionSpaceScaler(env)
return env
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env = ch.envs.ActionSpaceScaler(env)
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(input_size=env.state_size,
output_size=env.action_size,
hiddens=[64, 64],
activation='tanh')
meta_learner = l2l.algorithms.MAML(policy, lr=meta_lr)
baseline = LinearValue(env.state_size, env.action_size)
opt = optim.Adam(meta_learner.parameters(), lr=meta_lr)
for iteration in range(num_iterations):
iteration_reward = 0.0
iteration_replays = []
iteration_policies = []
num_iterations=200,
meta_bsz=20,
adapt_bsz=20,
tau=1.00,
gamma=0.99,
num_workers=2,
seed=42,
):
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
def make_env():
return gym.make(env_name)
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(env.state_size, env.action_size)
meta_learner = l2l.algorithms.MAML(policy, lr=meta_lr)
baseline = LinearValue(env.state_size, env.action_size)
opt = optim.Adam(policy.parameters(), lr=meta_lr)
all_rewards = []
for iteration in range(num_iterations):
iteration_loss = 0.0
iteration_reward = 0.0
for task_config in tqdm(env.sample_tasks(meta_bsz), leave=False, desc='Data'): # Samples a new config
learner = meta_learner.clone()
env.set_task(task_config)
env.reset()
task = ch.envs.Runner(env)
num_workers=2,
cuda=0,
):
cuda = bool(cuda)
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
if cuda:
th.cuda.manual_seed(seed)
def make_env():
env = gym.make(env_name)
env = ch.envs.ActionSpaceScaler(env)
return env
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env.set_task(env.sample_tasks(1)[0])
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(env.state_size, env.action_size)
if cuda:
policy.to('cuda')
baseline = LinearValue(env.state_size, env.action_size)
for iteration in range(num_iterations):
iteration_reward = 0.0
iteration_replays = []
iteration_policies = []
for task_config in tqdm(env.sample_tasks(meta_bsz), leave=False, desc='Data'): # Samples a new config
clone = deepcopy(policy)
env.set_task(task_config)
rank=args.local_rank,
world_size=WORLD_SIZE)
rank = dist.get_rank()
meta_bsz /= WORLD_SIZE
seed += rank
th.set_num_threads(1)
random.seed(seed)
np.random.seed(seed)
th.manual_seed(seed)
def make_env():
return gym.make(env_name)
env = l2l.gym.AsyncVectorEnv([make_env for _ in range(num_workers)])
env.seed(seed)
env = ch.envs.Torch(env)
policy = DiagNormalPolicy(input_size=env.state_size,
output_size=env.action_size,
hiddens=[64, 64])
meta_learner = l2l.algorithms.MAML(policy, lr=meta_lr)
baseline = LinearValue(env.state_size, env.action_size)
opt = optim.Adam(meta_learner.parameters(), lr=meta_lr)
all_rewards = []
for iteration in range(num_iterations):
iteration_reward = 0.0
iteration_replays = []
iteration_policies = []
# Sample Trajectories