Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert np.allclose(x, y)
del x, y
x = np.random.randn(10, 20, 30)
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
# raw list
x = [2.43]
y = tensorify(x, 'cpu')
assert np.allclose(x, y.item())
del x, y
x = [1, 2, 3, 4, 5, 6]
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
x = [[1, 2], [3, 4], [5, 6]]
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
solutions = es.ask()
data = [(config, seed, device, solution) for solution in solutions]
out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize'])
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
def choose_action(self, obs, **kwargs):
obs = tensorify(obs, self.device)
out = {}
if kwargs['mode'] == 'train':
dist = self.actor(obs)
action = dist.rsample()
out['action'] = action
out['action_logprob'] = dist.log_prob(action)
elif kwargs['mode'] == 'stochastic':
with torch.no_grad():
out['action'] = numpify(self.actor(obs).sample(), 'float')
elif kwargs['mode'] == 'eval':
with torch.no_grad():
out['action'] = numpify(torch.tanh(self.actor.mean_forward(obs)), 'float')
else:
raise NotImplementedError
return out
def fitness(data):
torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK
config, seed, device, param = data
env = make_env(config, seed, 'train')
agent = Agent(config, env, device)
agent.from_vec(tensorify(param, 'cpu'))
runner = EpisodeRunner()
with torch.no_grad():
D = runner(agent, env, 10)
R = np.mean([sum(traj.rewards) for traj in D])
H = np.mean([traj.T for traj in D])
return R, H
def choose_action(self, obs, **kwargs):
obs = tensorify(obs, self.device)
with torch.no_grad():
action = numpify(self.actor(obs), 'float')
if kwargs['mode'] == 'train':
eps = np.random.normal(0.0, self.config['agent.action_noise'], size=action.shape)
action = np.clip(action + eps, self.env.action_space.low, self.env.action_space.high)
out = {}
out['action'] = action
return out
def choose_action(self, x, **kwargs):
obs = tensorify(x.observation, self.device).unsqueeze(0)
features = self.feature_network(obs)
action_dist = self.action_head(features)
V = self.V_head(features)
action = action_dist.sample()
out = {}
out['action_dist'] = action_dist
out['V'] = V
out['entropy'] = action_dist.entropy()
out['action'] = action
out['raw_action'] = numpify(action, self.env.action_space.dtype).squeeze(0)
out['action_logprob'] = action_dist.log_prob(action.detach())
return out
start_time = time.perf_counter()
solutions = es.ask()
out = list(executor.map(fitness, solutions, chunksize=2))
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - start_time, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1)%config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
solutions = es.ask()
data = [(config, seed, device, solution) for solution in solutions]
out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize'])
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
def fitness(data):
torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK
config, seed, device, param = data
env = make_env(config, seed, 'train')
agent = Agent(config, env, device)
agent.from_vec(tensorify(param, 'cpu'))
runner = EpisodeRunner()
with torch.no_grad():
D = runner(agent, env, 10)
R = np.mean([sum(traj.rewards) for traj in D])
H = np.mean([traj.T for traj in D])
return R, H