Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if __name__ == "__main__":
n_step = 10
# init the game "forest" (or "battle" here)
env = gw.GridWorld(load_forest())
env.reset()
# add two groups of animals
deer_handle, tiger_handle = env.get_handles()
env.add_agents(deer_handle, method="random", n=1000000)
env.add_agents(tiger_handle, method="random", n=1000000)
# init two models
model1 = RandomActor(env, deer_handle)
model2 = RandomActor(env, tiger_handle)
total_reward = 0
print(env.get_view_space(deer_handle))
print(env.get_view_space(tiger_handle))
for i in range(n_step):
print("===== step %d =====" % i)
start_time = time.time()
obs_1 = measure_time("get obs 1", env.get_observation, deer_handle)
acts_1 = measure_time("infer act 1", model1.infer_action, obs_1)
measure_time("set act 1", env.set_action, deer_handle, acts_1)
obs_2 = measure_time("get obs 2", env.get_observation, tiger_handle)
if __name__ == "__main__":
n_step = 10
# init the game "forest" (or "battle" here)
env = gw.GridWorld(load_forest())
env.reset()
# add two groups of animals
deer_handle, tiger_handle = env.get_handles()
env.add_agents(deer_handle, method="random", n=1000000)
env.add_agents(tiger_handle, method="random", n=1000000)
# init two models
model1 = RandomActor(env, deer_handle)
model2 = RandomActor(env, tiger_handle)
total_reward = 0
print(env.get_view_space(deer_handle))
print(env.get_view_space(tiger_handle))
for i in range(n_step):
print("===== step %d =====" % i)
start_time = time.time()
obs_1 = measure_time("get obs 1", env.get_observation, deer_handle)
acts_1 = measure_time("infer act 1", model1.infer_action, obs_1)
measure_time("set act 1", env.set_action, deer_handle, acts_1)
obs_2 = measure_time("get obs 2", env.get_observation, tiger_handle)
acts_2 = measure_time("infer act 2", model2.infer_action, obs_2)
# init models
names = [args.name + "-a", "battle"]
batch_size = 512
unroll_step = 16
train_freq = 5
models = []
# load opponent
if args.opponent >= 0:
from magent.builtin.tf_model import DeepQNetwork
models.append(magent.ProcessingModel(env, handles[1], names[1], 20000, 0, DeepQNetwork))
models[0].load("data/battle_model", args.opponent)
else:
models.append(magent.ProcessingModel(env, handles[1], names[1], 20000, 0, RandomActor))
# load our model
if args.alg == 'dqn':
from magent.builtin.tf_model import DeepQNetwork
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, DeepQNetwork,
batch_size=batch_size,
learning_rate=3e-4,
memory_size=2 ** 20, train_freq=train_freq, eval_obs=eval_obs[0]))
step_batch_size = None
elif args.alg == 'drqn':
from magent.builtin.tf_model import DeepRecurrentQNetwork
models.append(magent.ProcessingModel(env, handles[0], names[0], 20001, 1000, DeepRecurrentQNetwork,
batch_size=batch_size/unroll_step, unroll_step=unroll_step,
learning_rate=3e-4,
memory_size=4 * 625, train_freq=train_freq, eval_obs=eval_obs[0]))
parser.add_argument("--greedy", action="store_true")
parser.add_argument("--map_size", type=int, default=500)
parser.add_argument("--name", type=str, default="tiger")
parser.add_argument('--alg', default='dqn', choices=['dqn', 'drqn', 'a2c'])
args = parser.parse_args()
# init the game
env = magent.GridWorld("double_attack", map_size=args.map_size)
env.set_render_dir("build/render")
# two groups of animal
deer_handle, tiger_handle = env.get_handles()
# init two models
models = [
RandomActor(env, deer_handle, tiger_handle),
]
batch_size = 512
unroll = 8
if args.alg == 'dqn':
from magent.builtin.tf_model import DeepQNetwork
models.append(DeepQNetwork(env, tiger_handle, "tiger",
batch_size=batch_size,
memory_size=2 ** 20, learning_rate=4e-4))
step_batch_size = None
elif args.alg == 'drqn':
from magent.builtin.tf_model import DeepRecurrentQNetwork
models.append(DeepRecurrentQNetwork(env, tiger_handle, "tiger",
batch_size=batch_size/unroll, unroll_step=unroll,
memory_size=20000, learning_rate=4e-4))
env.set_render_dir("build/render")
# init models
batch_size = 256
unroll_step = 8
target_update = 1000
train_freq = 5
handles = [0]
models = []
# models.append(DeepQNetwork(env, handles[0], "cars",
# batch_size=batch_size,
# memory_size=2 ** 20, target_update=target_update,
# train_freq=train_freq))
models.append(RandomActor(env, handles[0], "cars"))
# load if
savedir = 'save_model'
if args.load_from is not None:
start_from = args.load_from
print("load ... %d" % start_from)
for model in models:
model.load(savedir, start_from)
else:
start_from = 0
# print debug info
print(args)
print("view_space", env.get_view_space(handles[0]))
print("feature_space", env.get_feature_space(handles[0]))
Parameters
----------
env : environment
handles: list of handle
n_obs : int
number of observation
step : int
maximum step
Returns
-------
ret : list of raw observation
raw observation for every group
the format of raw observation is tuple(view, feature)
"""
models = [RandomActor(env, handle) for handle in handles]
n = len(handles)
views = [[] for _ in range(n)]
features = [[] for _ in range(n)]
done = False
step_ct = 0
while not done:
obs = [env.get_observation(handle) for handle in handles]
ids = [env.get_agent_id(handle) for handle in handles]
for i in range(n):
act = models[i].infer_action(obs[i], ids[i])
env.set_action(handles[i], act)
done = env.step()