Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def log_train(self, train_output, **kwargs):
D = train_output['D']
out_agent = train_output['out_agent']
n = train_output['n']
num_sec = train_output['num_sec']
logger = Logger()
logger('train_iteration', n+1) # starts from 1
logger('num_seconds', round(num_sec, 1))
if 'current_lr' in out_agent:
logger('current_lr', out_agent['current_lr'])
logger('loss', out_agent['loss'])
logger('policy_loss', out_agent['policy_loss'])
logger('policy_entropy', -out_agent['entropy_loss'])
logger('value_loss', out_agent['value_loss'])
logger('explained_variance', out_agent['explained_variance'])
batch_returns = D.numpy_rewards.sum(1)
logger('num_trajectories', D.N)
logger('num_timesteps', D.total_T)
logger('accumulated_trained_timesteps', self.agent.total_T)
def log_eval(self, eval_output, **kwargs):
D = eval_output['D']
n = eval_output['n']
T = eval_output['T']
num_sec = eval_output['num_sec']
logger = Logger()
batch_returns = D.numpy_rewards.sum(1)
logger('evaluation_iteration', n+1)
logger('num_seconds', round(num_sec, 1))
logger('num_trajectories', D.N)
logger('max_allowed_horizon', T)
logger('average_horizon', D.Ts.mean())
logger('total_timesteps', D.total_T)
logger('accumulated_trained_timesteps', self.agent.total_T)
logger('average_return', batch_returns.mean())
logger('std_return', batch_returns.std())
logger('min_return', batch_returns.min())
logger('max_return', batch_returns.max())
if n == 0 or (n+1) % self.config['log.print_interval'] == 0:
'seed': seed,
'sigma_scheduler_args': config['train.sigma_scheduler_args'],
'lr': config['train.lr'],
'lr_decay': config['train.lr_decay'],
'min_lr': config['train.min_lr'],
'antithetic': config['train.antithetic'],
'rank_transform': config['train.rank_transform']})
train_logs = []
with ProcessPoolExecutor(max_workers=config['train.popsize'], initializer=initializer, initargs=(config, seed, device)) as executor:
print('Finish initialization. Training starts...')
for generation in range(config['train.generations']):
solutions = es.ask()
out = list(executor.map(fitness, solutions))
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1)%config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if generation == 0 or (generation+1)%config['checkpoint.freq'] == 0:
agent.from_vec(torch.from_numpy(es.result.xbest).float())
agent.checkpoint(logdir, generation+1)
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
def eval(self, n=None, **kwargs):
self.model.eval()
logger = Logger()
for i, (data, label) in enumerate(self.test_loader):
data = data.to(self.model.device)
with torch.no_grad():
re_x, mu, logvar = self.model(data)
out = vae_loss(re_x, data, mu, logvar, 'BCE')
logger('eval_loss', out['loss'].item())
mean_loss = np.mean(logger.logs['eval_loss'])
print(f'====> Test set loss: {mean_loss}')
# Reconstruct some test images
data, label = next(iter(self.test_loader)) # get a random batch
data = data.to(self.model.device)
m = min(data.size(0), 8) # number of images
D = data[:m]
with torch.no_grad():
re_x, _, _ = self.model(D)
es = CMAES(mu0=[self.config['es.mu0']]*self._num_params,
std0=self.config['es.std0'],
popsize=self.config['es.popsize'])
elif self.config['es.algo'] == 'OpenAIES':
es = OpenAIES(mu0=[self.config['es.mu0']]*self._num_params,
std0=self.config['es.std0'],
popsize=self.config['es.popsize'],
std_decay=0.999,
min_std=0.01,
lr=1e-1,
lr_decay=0.99,
min_lr=1e-3,
antithetic=True,
rank_transform=True)
self.logger = Logger()
return es
def eval(self, n=None, **kwargs):
start_time = perf_counter()
returns = []
horizons = []
for _ in range(self.config['eval.num_episode']):
observation = self.eval_env.reset()
for _ in range(self.eval_env.spec.max_episode_steps):
with torch.no_grad():
action = self.agent.choose_action(observation, mode='eval')['action']
next_observation, reward, done, info = self.eval_env.step(action)
if done[0]: # [0] single environment
returns.append(info[0]['episode']['return'])
horizons.append(info[0]['episode']['horizon'])
break
observation = next_observation
logger = Logger()
logger('num_seconds', round(perf_counter() - start_time, 1))
logger('accumulated_trained_timesteps', kwargs['accumulated_trained_timesteps'])
logger('accumulated_trained_episodes', kwargs['accumulated_trained_episodes'])
logger('online_return', describe(returns, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('online_horizon', describe(horizons, axis=-1, repr_indent=1, repr_prefix='\n'))
monitor_env = get_wrapper(self.eval_env, 'VecMonitor')
logger('running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
return logger.logs
def log_train(self, train_output, **kwargs):
D = train_output['D']
out_agent = train_output['out_agent']
n = train_output['n']
num_sec = train_output['num_sec']
logger = Logger()
logger('train_iteration', n+1) # starts from 1
logger('num_seconds', round(num_sec, 1))
[logger(key, value) for key, value in out_agent.items()]
logger('num_segments', D.N)
logger('num_timesteps', D.total_T)
logger('accumulated_trained_timesteps', self.agent.total_T)
monitor_env = get_wrapper(self.runner.env, 'VecMonitor')
infos = list(filter(lambda info: 'episode' in info, chain.from_iterable(D.infos)))
if len(infos) > 0:
online_returns = np.asarray([info['episode']['return'] for info in infos])
online_horizons = np.asarray([info['episode']['horizon'] for info in infos])
logger('online_N', len(infos))
logger('online_mean_return', online_returns.mean())
def eval(self, n=None, **kwargs):
t0 = time.perf_counter()
with torch.no_grad():
D = self.runner(self.agent, self.eval_env, 10, mode='eval')
logger = Logger()
logger('eval_iteration', n+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('accumulated_trained_timesteps', self.agent.total_timestep)
logger('online_return', describe([sum(traj.rewards) for traj in D], axis=-1, repr_indent=1, repr_prefix='\n'))
logger('online_horizon', describe([traj.T for traj in D], axis=-1, repr_indent=1, repr_prefix='\n'))
logger('running_return', describe(self.eval_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('running_horizon', describe(self.eval_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n'))
logger.dump(keys=None, index=0, indent=0, border=color_str('+'*50, color='green'))
return logger.logs
{'popsize': config['train.popsize'],
'seed': seed,
'elite_ratio': config['train.elite_ratio'],
'noise_scheduler_args': config['train.noise_scheduler_args']})
train_logs = []
checkpoint_count = 0
with Pool(processes=config['train.popsize']//config['train.worker_chunksize']) as pool:
print('Finish initialization. Training starts...')
for generation in range(config['train.generations']):
t0 = time.perf_counter()
solutions = es.ask()
data = [(config, seed, device, solution) for solution in solutions]
out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize'])
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
agent = Agent(config, make_env(config, seed, 'eval'), device)
es = CMAES([config['train.mu0']]*agent.num_params, config['train.std0'],
{'popsize': config['train.popsize'],
'seed': seed})
train_logs = []
checkpoint_count = 0
with Pool(processes=config['train.popsize']//config['train.worker_chunksize']) as pool:
print('Finish initialization. Training starts...')
for generation in range(config['train.generations']):
t0 = time.perf_counter()
solutions = es.ask()
data = [(config, seed, device, solution) for solution in solutions]
out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize'])
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None