Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
variant_spec = example_module.get_variant_spec(example_args)
trainable_class = example_module.get_trainable_class(example_args)
experiment_id, experiment = generate_experiment(
trainable_class, variant_spec, example_args)
experiments = {experiment_id: experiment}
ray.init(
num_cpus=example_args.cpus,
num_gpus=example_args.gpus,
resources=example_args.resources or {},
local_mode=local_mode,
include_webui=example_args.include_webui,
temp_dir=example_args.temp_dir)
tune.run_experiments(
experiments,
with_server=example_args.with_server,
server_port=4321,
scheduler=None)
alg_run = 'PPO'
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
config['model']['use_lstm'] = tune.grid_search([True, False])
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
# config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
alg_run = 'PPO'
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
# config['model']['use_lstm'] = tune.grid_search([True, False])
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
if iter % 500 == 0:
trainer.save("saved_models/multi-carla/" + args.model_arch)
pprint(results)
else:
config = {
"env": "dm-" + env_name,
"log_level": "DEBUG",
"multiagent": {
"policy_graphs": {
"def_policy": (VTracePolicyGraph,
Box(0.0, 255.0, shape=(84, 84, 3)), Discrete(9),
{
"gamma": 0.99
})
},
"policy_mapping_fn": tune.function(lambda agent_id: "def_policy"),
},
"env_config": env_actor_configs,
"num_workers": args.num_workers,
"num_envs_per_worker": args.envs_per_worker,
"sample_batch_size": args.sample_bs_per_worker,
"train_batch_size": args.train_bs
}
experiment_spec = tune.Experiment(
"multi-carla/" + args.model_arch,
"IMPALA",
# timesteps_total is init with None (not 0) which causes issue
# stop={"timesteps_total": args.num_steps},
stop={"timesteps_since_restore": args.num_steps},
config=config,
checkpoint_freq=1000,
num_gpus_per_worker = 0
num_cpus_per_worker = int(spare_cpus / num_workers)
# hyperparams
if tune_hparams:
config.update({
"train_batch_size": 128,
"horizon": 1000,
"lr_schedule": [[0, tune.grid_search([5e-4, 5e-3])],
[20000000, tune.grid_search([5e-4, 5e-5])]],
"num_workers": num_workers,
"num_gpus": gpus_for_driver, # The number of GPUs for the driver
"num_cpus_for_driver": cpus_for_driver,
"num_gpus_per_worker": num_gpus_per_worker, # Can be a fraction
"num_cpus_per_worker": num_cpus_per_worker, # Can be a fraction
"entropy_coeff": tune.grid_search([0, -1e-1, -1e-2]),
"multiagent": {
"policy_graphs": policy_graphs,
"policy_mapping_fn": tune.function(policy_mapping_fn),
},
"model": {"custom_model": "conv_to_fc_net", "use_lstm": True,
"lstm_cell_size": 128}
})
else:
config.update({
#"train_batch_size": 128,
"horizon": 1000,
# "lr_schedule": [[0, hparams['lr_init']],
# [20000000, hparams['lr_final']]],
"num_workers": num_workers,
"num_gpus": gpus_for_driver, # The number of GPUs for the driver
"num_cpus_for_driver": cpus_for_driver,
alg_run = 'PPO'
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
# config['model']['use_lstm'] = tune.grid_search([True, False])
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
alg_run = 'PPO'
config = ppo.DEFAULT_CONFIG.copy()
config['num_workers'] = N_CPUS
config['train_batch_size'] = HORIZON * N_ROLLOUTS
config['gamma'] = 0.999 # discount rate
config['model'].update({'fcnet_hiddens': [64, 64]})
config['clip_actions'] = True
config['horizon'] = HORIZON
config['vf_share_layers'] = True
# config['use_centralized_vf'] = False
# config['max_vf_agents'] = 140
# config['simple_optimizer'] = True
# config['vf_clip_param'] = 100
# Grid search things
config['lr'] = tune.grid_search([5e-5, 5e-4])
config['num_sgd_iter'] = tune.grid_search([10, 30])
# LSTM Things
config['model']['use_lstm'] = tune.grid_search([True, False])
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
# config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)