Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
Rs, Hs = zip(*out)
es.tell(solutions, [-R for R in Rs])
logger = Logger()
logger('generation', generation+1)
logger('num_seconds', round(time.perf_counter() - t0, 1))
logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n'))
logger('fbest', es.result.fbest)
train_logs.append(logger.logs)
if generation == 0 or (generation+1) % config['log.freq'] == 0:
logger.dump(keys=None, index=0, indent=0, border='-'*50)
if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.from_vec(tensorify(es.result.xbest, 'cpu'))
agent.checkpoint(logdir, generation+1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
agent = Agent(config, env, device)
runner = StepRunner(reset_on_call=False)
engine = Engine(config, agent=agent, env=env, runner=runner)
train_logs = []
checkpoint_count = 0
for i in count():
if agent.total_timestep >= config['train.timestep']:
break
train_logger = engine.train(i)
train_logs.append(train_logger.logs)
if i == 0 or (i+1) % config['log.freq'] == 0:
train_logger.dump(keys=None, index=0, indent=0, border='-'*50)
if agent.total_timestep >= int(config['train.timestep']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.checkpoint(logdir, i + 1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
agent = Agent(config, env, device)
runner = StepRunner(reset_on_call=False)
engine = Engine(config, agent=agent, env=env, runner=runner)
train_logs = []
checkpoint_count = 0
for i in count():
if agent.total_timestep >= config['train.timestep']:
break
train_logger = engine.train(i)
train_logs.append(train_logger.logs)
if i == 0 or (i+1) % config['log.freq'] == 0:
train_logger.dump(keys=None, index=0, indent=0, border='-'*50)
if agent.total_timestep >= int(config['train.timestep']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.checkpoint(logdir, i + 1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
agent = Agent(config, env, device)
runner = EpisodeRunner(reset_on_call=False)
engine = Engine(config, agent=agent, env=env, runner=runner)
train_logs = []
checkpoint_count = 0
for i in count():
if agent.total_timestep >= config['train.timestep']:
break
train_logger = engine.train(i)
train_logs.append(train_logger.logs)
if i == 0 or (i+1) % config['log.freq'] == 0:
train_logger.dump(keys=None, index=0, indent=0, border='-'*50)
if agent.total_timestep >= int(config['train.timestep']*(checkpoint_count/(config['checkpoint.num'] - 1))):
agent.checkpoint(logdir, i + 1)
checkpoint_count += 1
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
return None
train_output = engine.train(i)
if i == 0 or (i+1) % config['log.record_interval'] == 0 or (i+1) % config['log.print_interval'] == 0:
train_log = engine.log_train(train_output)
with torch.no_grad(): # disable grad, save memory
eval_output = engine.eval(n=i)
eval_log = engine.log_eval(eval_output)
if i == 0 or (i+1) % config['log.record_interval'] == 0:
train_logs.append(train_log)
eval_logs.append(eval_log)
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
return None
elif 'train.timestep' in config and agent.total_T >= config['train.timestep']: # enough timesteps
break
train_output = engine.train(i)
if i == 0 or (i+1) % config['log.interval'] == 0:
train_log = engine.log_train(train_output)
train_logs.append(train_log)
if config['eval.independent']:
with torch.no_grad(): # disable grad, save memory
eval_output = engine.eval(n=i)
eval_log = engine.log_eval(eval_output)
eval_logs.append(eval_log)
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
return None
optimizer = optim.Adam(model.parameters(), lr=config['lr'])
engine = Engine(config,
model=model,
optimizer=optimizer,
train_loader=train_loader,
test_loader=test_loader)
train_logs = []
eval_logs = []
for epoch in range(config['train.num_epoch']):
train_logger = engine.train(epoch, logdir=logdir)
train_logs.append(train_logger.logs)
eval_logger = engine.eval(epoch, logdir=logdir)
eval_logs.append(eval_logger.logs)
pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl')
pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl')
return None
def checkpoint(self, logdir, num_iter):
self.save(logdir/f'agent_{num_iter}.pth')
if 'env.normalize_obs' in self.config and self.config['env.normalize_obs']:
moments = (self.env.obs_moments.mean, self.env.obs_moments.var)
pickle_dump(obj=moments, f=logdir/f'obs_moments_{num_iter}', ext='.pth')
def checkpoint(self, logdir, num_iter):
self.save(logdir/f'agent_{num_iter}.pth')
obs_env = get_wrapper(self.env, 'VecStandardizeObservation')
if obs_env is not None:
pickle_dump(obj=(obs_env.mean, obs_env.var), f=logdir/f'obs_moments_{num_iter}', ext='.pth')