Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
)
if args.debug:
logger.info("-" * 16)
logger.info("Configuration:")
logger.info(agent_config)
if args.save:
save_dir = os.path.dirname(args.save)
if not os.path.isdir(save_dir):
try:
os.mkdir(save_dir, 0o755)
except OSError:
raise OSError("Cannot save agent to dir {} ()".format(save_dir))
runner = Runner(
agent=agent,
environment=environment,
repeat_actions=1
)
report_episodes = args.episodes // 1000
if args.debug:
report_episodes = 1
def episode_finished(r):
if r.episode % report_episodes == 0:
sps = r.timestep / (time.time() - r.start_time)
logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps))
logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
for i in xrange(args.experiments):
config = original_config.copy()
environment = OpenAIGym(args.gym_id)
agent = agents[config.agent](states_spec=environment.states,
actions_spec=environment.actions,
network_spec=config.network,
config=config)
if i == 0 and args.load_model:
logger.info("Loading model data from file: {}".format(args.load_model))
agent.load_model(args.load_model)
runner = Runner(
agent=agent,
environment=environment,
repeat_actions=1,
history=history_data
# save_path=args.model,
# save_episodes=args.save_model
)
environment.reset()
agent.reset()
logger.info("Starting experiment {}".format(i+1))
experiment_start_time = int(time.time())
runner.run(episodes=config.episodes, max_episode_timesteps=config.max_timesteps, episode_finished=episode_finished)
experiment_end_time = int(time.time())
)
agent = Agent.from_spec(
spec=agent,
kwargs=dict(
states=environment.states,
actions=environment.actions,
network=network
)
)
logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(gym_id=args.gym_id))
logger.info("Config:")
logger.info(agent)
runner = Runner(
agent=agent,
environment=environment,
repeat_actions=1
)
if args.debug: # TODO: Timestep-based reporting
report_episodes = 1
else:
report_episodes = 100
def episode_finished(r):
if r.episode % report_episodes == 0:
steps_per_second = r.timestep / (time.time() - r.start_time)
logger.info("Finished episode {} after overall {} timesteps. Steps Per Second {}".format(
r.agent.episode,
r.agent.timestep,
# PPOAgent
step_optimizer=dict(
type='adam',
learning_rate=(1e-4) # 1e-4
),
subsampling_fraction=0.2, # 0.1
optimization_steps=10,
execution=dict(
type='single',
session_config=None,
distributed_spec=None
)
)
train_runner = Runner(agent=agent, environment=environment)
test_runner = Runner(
agent=agent,
environment=test_environment,
)
train_runner.run(episodes=100, max_episode_timesteps=16000, episode_finished=episode_finished)
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
ep=train_runner.episode,
ar=np.mean(train_runner.episode_rewards[-100:]))
)
test_runner.run(num_episodes=1, deterministic=True, testing=True, episode_finished=print_simple_log)
# Critic
critic_network='auto',
critic_optimizer=dict(optimizer='adam', multi_step=10, learning_rate=1e-3),
# Preprocessing
preprocessing=None,
# Exploration
exploration=0.0, variable_noise=0.0,
# Regularization
l2_regularization=0.0, entropy_regularization=0.0,
# TensorFlow etc
name='agent', device=None, parallel_interactions=1, seed=None, execution=None, saver=None,
summarizer=None, recorder=None
)
# Initialize the runner
runner = Runner(agent=agent, environment=environment)
# Start the runner
runner.run(num_episodes=300)
runner.close()
likelihood_ratio_clipping=0.2,
# PPOAgent
step_optimizer=dict(
type='adam',
learning_rate=(1e-4) # 1e-4
),
subsampling_fraction=0.2, # 0.1
optimization_steps=10,
execution=dict(
type='single',
session_config=None,
distributed_spec=None
)
)
train_runner = Runner(agent=agent, environment=environment)
test_runner = Runner(
agent=agent,
environment=test_environment,
)
train_runner.run(episodes=100, max_episode_timesteps=16000, episode_finished=episode_finished)
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
ep=train_runner.episode,
ar=np.mean(train_runner.episode_rewards[-100:]))
)
test_runner.run(num_episodes=1, deterministic=True, testing=True, episode_finished=print_simple_log)
env.set_training_agent(agent.agent_id)
break
if args.record_pngs_dir:
assert not os.path.isdir(args.record_pngs_dir)
os.makedirs(args.record_pngs_dir)
if args.record_json_dir:
assert not os.path.isdir(args.record_json_dir)
os.makedirs(args.record_json_dir)
# Create a Proximal Policy Optimization agent
agent = training_agent.initialize(env)
atexit.register(functools.partial(clean_up_agents, agents))
wrapped_env = WrappedEnv(env, visualize=args.render)
runner = Runner(agent=agent, environment=wrapped_env)
runner.run(episodes=10, max_episode_timesteps=2000)
print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
runner.episode_times)
try:
runner.close()
except AttributeError as e:
pass
baseline_mode=None,
baseline=None,
baseline_optimizer=None,
gae_lambda=None,
# PGLRModel
actions_exploration=dict(
type='epsilon_decay',
initial_epsilon=1.0,
final_epsilon=0.05,
timesteps=500000,
),
likelihood_ratio_clipping=0.2,
)
# Create the runner
runner = Runner(agent=agent, environment=env)
# Callback function printing episode statistics
def episode_finished(r):
reward = r.episode_rewards[-1]
print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode,
ts=r.episode_timestep,
reward=reward))
rewards.append(reward)
with open('saved_rewards.pkl', 'wb') as output_file:
pickle.dump(obj=rewards, file=output_file)
return True
# Start learning