Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def play_wrapper(model_names, n_rounds):
time_stamp = time.time()
models = []
for i, item in enumerate(model_names):
models.append(magent.ProcessingModel(env, handles[i], item[1], 0, item[-1]))
for i, item in enumerate(model_names):
models[i].load(item[0], item[2])
leftID, rightID = 0, 1
result = 0
total_num = np.zeros(2)
for _ in range(n_rounds):
round_num = play(env, handles, models, map_size, leftID, rightID)
total_num += round_num
leftID, rightID = rightID, leftID
result += 1 if round_num[0] > round_num[1] else 0
result = 1.0 * result
for model in models:
model.quit()
base_args = {'batch_size': batch_size / unroll_step, 'unroll_step': unroll_step,
'memory_size': 8 * 300,
'target_update': target_update, 'train_freq': train_freq}
elif args.alg == 'a2c':
raise NotImplementedError
else:
raise NotImplementedError
# load models
names = [args.name + "-l0", args.name + "-l1", args.name + "-r0", args.name + "-r1"]
models = []
for i in range(len(names)):
model_args = {'eval_obs': eval_obs[i]}
model_args.update(base_args)
models.append(magent.ProcessingModel(env, handles[i], names[i], 20000+i, 1000, RLModel, **model_args))
# load if
savedir = 'save_model'
if args.load_from is not None:
start_from = args.load_from
print("load ... %d" % start_from)
for model in models:
model.load(savedir, start_from)
else:
start_from = 0
# print state info
print(args)
print("view_size", env.get_view_space(handles[0]))
print("feature_size", env.get_feature_space(handles[0]))
train_freq = 5
models = []
# load opponent
if args.opponent >= 0:
from magent.builtin.tf_model import DeepQNetwork
models.append(magent.ProcessingModel(env, handles[1], names[1], 20000, 0, DeepQNetwork))
models[0].load("data/battle_model", args.opponent)
else:
models.append(magent.ProcessingModel(env, handles[1], names[1], 20000, 0, RandomActor))
# load our model
if args.alg == 'dqn':
from magent.builtin.tf_model import DeepQNetwork
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, DeepQNetwork,
batch_size=batch_size,
learning_rate=3e-4,
memory_size=2 ** 20, train_freq=train_freq, eval_obs=eval_obs[0]))
step_batch_size = None
elif args.alg == 'drqn':
from magent.builtin.tf_model import DeepRecurrentQNetwork
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, DeepRecurrentQNetwork,
batch_size=batch_size/unroll_step, unroll_step=unroll_step,
learning_rate=3e-4,
memory_size=4 * 625, train_freq=train_freq, eval_obs=eval_obs[0]))
step_batch_size = None
elif args.alg == 'a2c':
from magent.builtin.mx_model import AdvantageActorCritic
step_batch_size = 10 * args.map_size * args.map_size * 0.04
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, AdvantageActorCritic,
base_args = {'batch_size': batch_size / unroll_step, 'unroll_step': unroll_step,
'memory_size': 8 * 625, 'learning_rate': 1e-4,
'target_update': target_update, 'train_freq': train_freq}
elif args.alg == 'a2c':
raise NotImplementedError
else:
raise NotImplementedError
# init models
names = [args.name + "-l", args.name + "-r"]
models = []
for i in range(len(names)):
model_args = {'eval_obs': eval_obs[i]}
model_args.update(base_args)
models.append(magent.ProcessingModel(env, handles[i], names[i], 20000, 1000, RLModel, **model_args))
# load if
savedir = 'save_model'
if args.load_from is not None:
start_from = args.load_from
print("load ... %d" % start_from)
for model in models:
model.load(savedir, start_from)
else:
start_from = 0
# print state info
print(args)
print("view_space", env.get_view_space(handles[0]))
print("feature_space", env.get_feature_space(handles[0]))
batch_size=batch_size,
learning_rate=3e-4,
memory_size=2 ** 20, train_freq=train_freq, eval_obs=eval_obs[0]))
step_batch_size = None
elif args.alg == 'drqn':
from magent.builtin.tf_model import DeepRecurrentQNetwork
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, DeepRecurrentQNetwork,
batch_size=batch_size/unroll_step, unroll_step=unroll_step,
learning_rate=3e-4,
memory_size=4 * 625, train_freq=train_freq, eval_obs=eval_obs[0]))
step_batch_size = None
elif args.alg == 'a2c':
from magent.builtin.mx_model import AdvantageActorCritic
step_batch_size = 10 * args.map_size * args.map_size * 0.04
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, AdvantageActorCritic,
learning_rate=1e-3))
# load if
savedir = 'save_model'
if args.load_from is not None:
start_from = args.load_from
print("load ... %d" % start_from)
models[0].load(savedir, start_from)
else:
start_from = 0
# print debug info
print(args)
print("view_size", env.get_view_space(handles[0]))
print("feature_size", env.get_feature_space(handles[0]))
RLModel = DeepRecurrentQNetwork
base_args = {'batch_size': batch_size / unroll_step, 'unroll_step': unroll_step,
'memory_size': 8 * 625, 'learning_rate': 1e-4,
'target_update': target_update, 'train_freq': train_freq}
elif args.alg == 'a2c':
# see train_against.py to know how to use a2c
raise NotImplementedError
# init models
names = [args.name + "-l", args.name + "-r"]
models = []
for i in range(len(names)):
model_args = {'eval_obs': eval_obs[i]}
model_args.update(base_args)
models.append(magent.ProcessingModel(env, handles[i], names[i], 20000+i, 1000, RLModel, **model_args))
# load if
savedir = 'save_model'
if args.load_from is not None:
start_from = args.load_from
print("load ... %d" % start_from)
for model in models:
model.load(savedir, start_from)
else:
start_from = 0
# print state info
print(args)
print("view_space", env.get_view_space(handles[0]))
print("feature_space", env.get_feature_space(handles[0]))