Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
pbar.update(steps)
# train
test_flag = 0
pbar = tqdm(total=args.train_total_steps)
total_steps = 0
max_reward = None
while total_steps < args.train_total_steps:
# start epoch
total_reward, steps, loss = run_train_episode(env, agent, rpm)
total_steps += steps
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
tensorboard.add_scalar('dqn/score', total_reward, total_steps)
tensorboard.add_scalar('dqn/loss', loss,
total_steps) # mean of total loss
tensorboard.add_scalar('dqn/exploration', agent.exploration,
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
total_steps, np.mean(eval_rewards)))
eval_test = np.mean(eval_rewards)
tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
total_steps, np.mean(eval_rewards)))
eval_test = np.mean(eval_rewards)
tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
pbar.close()
total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar:
while rpm.size() < MEMORY_WARMUP_SIZE:
total_reward, steps, _ = run_train_episode(env, agent, rpm)
pbar.update(steps)
# train
test_flag = 0
pbar = tqdm(total=args.train_total_steps)
total_steps = 0
max_reward = None
while total_steps < args.train_total_steps:
# start epoch
total_reward, steps, loss = run_train_episode(env, agent, rpm)
total_steps += steps
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
tensorboard.add_scalar('dqn/score', total_reward, total_steps)
tensorboard.add_scalar('dqn/loss', loss,
total_steps) # mean of total loss
tensorboard.add_scalar('dqn/exploration', agent.exploration,
total_steps)
pbar.update(steps)
if total_steps // args.test_every_steps >= test_flag:
while total_steps // args.test_every_steps >= test_flag:
test_flag += 1
pbar.write("testing")
eval_rewards = []
for _ in tqdm(range(3), desc='eval agent'):
eval_reward = run_evaluate_episode(test_env, agent)
eval_rewards.append(eval_reward)
logger.info(
"eval_agent done, (steps, eval_reward): ({}, {})".format(
if self.env_reward_stat.count > 500:
tensorboard.add_scalar('recent_env_reward',
self.env_reward_stat.mean,
self.total_steps)
tensorboard.add_scalar('recent_shaping_reward',
self.shaping_reward_stat.mean,
self.total_steps)
if self.critic_loss_stat.count > 500:
tensorboard.add_scalar('recent_critic_loss',
self.critic_loss_stat.mean,
self.total_steps)
tensorboard.add_scalar('episode_length', n, self.total_steps)
tensorboard.add_scalar('max_env_reward', self.max_env_reward,
self.total_steps)
tensorboard.add_scalar('ready_actor_num',
self.ready_actor_queue.qsize(),
self.total_steps)
tensorboard.add_scalar('episode_time', episode_time,
self.total_steps)
self.noiselevel = self.noiselevel * NOISE_DECAY
self.max_env_reward = max(self.max_env_reward,
episode_env_reward)
if self.env_reward_stat.count > 500:
tensorboard.add_scalar('recent_env_reward',
self.env_reward_stat.mean,
self.total_steps)
tensorboard.add_scalar('recent_shaping_reward',
self.shaping_reward_stat.mean,
self.total_steps)
if self.critic_loss_stat.count > 500:
tensorboard.add_scalar('recent_critic_loss',
self.critic_loss_stat.mean,
self.total_steps)
tensorboard.add_scalar('episode_length', n, self.total_steps)
tensorboard.add_scalar('max_env_reward', self.max_env_reward,
self.total_steps)
tensorboard.add_scalar('ready_actor_num',
self.ready_actor_queue.qsize(),
self.total_steps)
tensorboard.add_scalar('episode_time', episode_time,
self.total_steps)
self.noiselevel = self.noiselevel * NOISE_DECAY
if actor_state.ident % 3 == 2: # trajectory without noise
self.env_reward_stat.add(episode_env_reward)
self.shaping_reward_stat.add(episode_shaping_reward)
self.max_env_reward = max(self.max_env_reward,
episode_env_reward)
if self.env_reward_stat.count > 500:
tensorboard.add_scalar('recent_env_reward',
self.env_reward_stat.mean,
self.total_steps)
tensorboard.add_scalar('recent_shaping_reward',
self.shaping_reward_stat.mean,
self.total_steps)
if self.critic_loss_stat.count > 500:
tensorboard.add_scalar('recent_critic_loss',
self.critic_loss_stat.mean,
self.total_steps)
tensorboard.add_scalar('episode_length', n, self.total_steps)
tensorboard.add_scalar('max_env_reward', self.max_env_reward,
self.total_steps)
tensorboard.add_scalar('ready_actor_num',
self.ready_actor_queue.qsize(),
self.total_steps)
tensorboard.add_scalar('episode_time', episode_time,
self.total_steps)
self.noiselevel = self.noiselevel * NOISE_DECAY
def log_metrics(self, metrics):
logger.info(metrics)
for k, v in metrics.items():
if v is not None:
tensorboard.add_scalar(k, v, self.sample_total_steps)
with self.memory_lock:
self.total_steps += n
self.add_episode_rpm(episode_rpm)
if actor_state.ident % 3 == 2: # trajectory without noise
self.env_reward_stat.add(episode_env_reward)
self.shaping_reward_stat.add(episode_shaping_reward)
self.max_env_reward = max(self.max_env_reward,
episode_env_reward)
if self.env_reward_stat.count > 500:
tensorboard.add_scalar('recent_env_reward',
self.env_reward_stat.mean,
self.total_steps)
tensorboard.add_scalar('recent_shaping_reward',
self.shaping_reward_stat.mean,
self.total_steps)
if self.critic_loss_stat.count > 500:
tensorboard.add_scalar('recent_critic_loss',
self.critic_loss_stat.mean,
self.total_steps)
tensorboard.add_scalar('episode_length', n, self.total_steps)
tensorboard.add_scalar('max_env_reward', self.max_env_reward,
self.total_steps)
tensorboard.add_scalar('ready_actor_num',
self.ready_actor_queue.qsize(),
self.total_steps)
tensorboard.add_scalar('episode_time', episode_time,
self.total_steps)
self.noiselevel = self.noiselevel * NOISE_DECAY