Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print_every=10, record=False, render=False, eps=None):
env.reset()
generate_map(env, map_size, food_handle, handles)
step_ct = 0
total_reward = 0
done = False
pos_reward_ct = set()
n = len(handles)
obs = [None for _ in range(n)]
ids = [None for _ in range(n)]
acts = [None for _ in range(n)]
nums = [env.get_num(handle) for handle in handles]
sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)
center_x, center_y = map_size // 2, map_size // 2
print("===== sample =====")
print("eps %s number %s" % (eps, nums))
start_time = time.time()
new_rule_ct = 0
last_base_reward = {}
while not done:
# take actions for every model
for i in range(n):
obs[i] = env.get_observation(handles[i])
ids[i] = env.get_agent_id(handles[i])
# rule modification signal
print_every=10, record=False, render=False, eps=None):
env.reset()
generate_map(env, map_size, food_handle, handles, rnd)
step_ct = 0
total_reward = 0
done = False
pos_reward_ct = set()
n = len(handles)
obs = [None for _ in range(n)]
ids = [None for _ in range(n)]
acts = [None for _ in range(n)]
nums = [env.get_num(handle) for handle in handles]
sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)
center_x, center_y = map_size // 2, map_size // 2
print("===== sample =====")
print("eps %s number %s" % (eps, nums))
start_time = time.time()
new_rule_ct = 0
last_base_reward = {}
while not done:
# take actions for every model
for i in range(n):
obs[i] = env.get_observation(handles[i])
ids[i] = env.get_agent_id(handles[i])
# rule modification signal
if cmd[0] == 'act':
policy = cmd[1]
eps = cmd[2]
array_info = cmd[3]
view, feature, ids = NDArrayPackage(array_info).recv_from(conn)
obs = (view, feature)
acts = model.infer_action(obs, ids, policy=policy, eps=eps)
package = NDArrayPackage(acts)
conn.send(package.info)
package.send_to(conn)
elif cmd[0] == 'train':
print_every = cmd[1]
total_loss, value = model.train(sample_buffer, print_every=print_every)
sample_buffer = magent.utility.EpisodesBuffer(sample_buffer_capacity)
conn.send((total_loss, value))
elif cmd[0] == 'sample':
array_info = cmd[1]
rewards, alives = NDArrayPackage(array_info).recv_from(conn)
sample_buffer.record_step(ids, obs, acts, rewards, alives)
conn.send("done")
elif cmd[0] == 'save':
savedir = cmd[1]
n_iter = cmd[2]
model.save(savedir, n_iter)
conn.send("done")
elif cmd[0] == 'load':
savedir = cmd[1]
n_iter = cmd[2]
name = cmd[3]
model.load(savedir, n_iter, name)
def play_a_round(env, map_size, handles, models, print_every, train=True, render=False, eps=None):
env.reset()
generate_map(env, map_size, handles)
step_ct = 0
done = False
n = len(handles)
obs = [[] for _ in range(n)]
ids = [[] for _ in range(n)]
acts = [[] for _ in range(n)]
nums = [env.get_num(handle) for handle in handles]
sample_buffer = magent.utility.EpisodesBuffer(capacity=1000)
total_reward = [0 for _ in range(n)]
print("===== sample =====")
print("eps %.2f number %s" % (eps, nums))
start_time = time.time()
while not done:
# take actions for every model
for i in range(n):
obs[i] = env.get_observation(handles[i])
ids[i] = env.get_agent_id(handles[i])
acts[i] = models[i].infer_action(obs[i], ids[i], 'e_greedy', eps=eps)
acts[i][:] = 3
env.set_action(handles[i], acts[i])
# simulate one step
done = env.step()
print_every=10, record=False, render=False, eps=None):
env.reset()
generate_map(env, map_size, food_handle, handles)
step_ct = 0
total_reward = 0
done = False
pos_reward_ct = set()
n = len(handles)
obs = [None for _ in range(n)]
ids = [None for _ in range(n)]
acts = [None for _ in range(n)]
nums = [env.get_num(handle) for handle in handles]
sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)
print("===== sample =====")
print("eps %s number %s" % (eps, nums))
start_time = time.time()
while not done:
# take actions for every model
for i in range(n):
obs[i] = env.get_observation(handles[i])
ids[i] = env.get_agent_id(handles[i])
acts[i] = models[i].infer_action(obs[i], ids[i], policy='e_greedy', eps=eps)
env.set_action(handles[i], acts[i])
# simulate one step
done = env.step()
# sample