Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
make_obs_ph=make_obs_ph,
q_func=q_func,
num_actions=DotaEnvironment.get_action_space().n,
optimizer=tf.train.AdamOptimizer(learning_rate=lr),
gamma=gamma,
grad_norm_clipping=10,)
if prioritized_replay:
replay_buffer = PrioritizedReplayBuffer(buffer_size, alpha=prioritized_replay_alpha)
if prioritized_replay_beta_iters is None:
prioritized_replay_beta_iters = total_timesteps
beta_schedule = LinearSchedule(prioritized_replay_beta_iters,
initial_p=prioritized_replay_beta0,
final_p=1.0)
else:
replay_buffer = ReplayBuffer(buffer_size)
beta_schedule = None
U.initialize()
update_target()
reward_shaper = ActionAdviceRewardShaper(config=config)
reward_shaper.load()
reward_shaper.generate_merged_demo()
full_exp_name = '{}-{}'.format(date.today().strftime('%Y%m%d'), experiment_name)
experiment_dir = os.path.join('experiments', full_exp_name)
os.makedirs(experiment_dir, exist_ok=True)
learning_dir = os.path.join(experiment_dir, 'learning')
learning_summary_writer = tf.summary.FileWriter(learning_dir)
batch of observations
act_batch: np.array
batch of actions executed given obs_batch
rew_batch: np.array
rewards received as results of executing act_batch
next_obs_batch: np.array
next set of observations seen after executing act_batch
done_mask: np.array
done_mask[i] = 1 if executing act_batch[i] resulted in
the end of an episode and 0 otherwise.
"""
idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
return self._encode_sample(idxes)
class PrioritizedReplayBuffer(ReplayBuffer):
def __init__(self, size, alpha):
"""Create Prioritized Replay buffer.
Parameters
----------
size: int
Max number of transitions to store in the buffer. When the buffer
overflows the old memories are dropped.
alpha: float
how much prioritization is used
(0 - no prioritization, 1 - full prioritization)
See Also
--------
ReplayBuffer.__init__
"""