How to use the magent.utility.EpisodesBuffer function in magent

To help you get started, we’ve selected a few magent examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github geek-ai / MAgent / train_arrange.py View on Github external
print_every=10, record=False, render=False, eps=None):
    env.reset()
    generate_map(env, map_size, food_handle, handles)

    step_ct = 0
    total_reward = 0
    done = False

    pos_reward_ct = set()

    n = len(handles)
    obs  = [None for _ in range(n)]
    ids  = [None for _ in range(n)]
    acts = [None for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)

    center_x, center_y = map_size // 2, map_size // 2

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()

    new_rule_ct = 0

    last_base_reward = {}
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # rule modification signal
github geek-ai / MAgent / examples / train_arrange.py View on Github external
print_every=10, record=False, render=False, eps=None):
    env.reset()
    generate_map(env, map_size, food_handle, handles, rnd)

    step_ct = 0
    total_reward = 0
    done = False

    pos_reward_ct = set()

    n = len(handles)
    obs  = [None for _ in range(n)]
    ids  = [None for _ in range(n)]
    acts = [None for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)

    center_x, center_y = map_size // 2, map_size // 2

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()

    new_rule_ct = 0

    last_base_reward = {}
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            # rule modification signal
github geek-ai / MAgent / python / magent / model.py View on Github external
if cmd[0] == 'act':
            policy = cmd[1]
            eps = cmd[2]
            array_info = cmd[3]

            view, feature, ids = NDArrayPackage(array_info).recv_from(conn)
            obs = (view, feature)

            acts = model.infer_action(obs, ids, policy=policy, eps=eps)
            package = NDArrayPackage(acts)
            conn.send(package.info)
            package.send_to(conn)
        elif cmd[0] == 'train':
            print_every = cmd[1]
            total_loss, value = model.train(sample_buffer, print_every=print_every)
            sample_buffer = magent.utility.EpisodesBuffer(sample_buffer_capacity)
            conn.send((total_loss, value))
        elif cmd[0] == 'sample':
            array_info = cmd[1]
            rewards, alives = NDArrayPackage(array_info).recv_from(conn)
            sample_buffer.record_step(ids, obs, acts, rewards, alives)
            conn.send("done")
        elif cmd[0] == 'save':
            savedir = cmd[1]
            n_iter = cmd[2]
            model.save(savedir, n_iter)
            conn.send("done")
        elif cmd[0] == 'load':
            savedir = cmd[1]
            n_iter = cmd[2]
            name = cmd[3]
            model.load(savedir, n_iter, name)
github geek-ai / MAgent / examples / train_city.py View on Github external
def play_a_round(env, map_size, handles, models, print_every, train=True, render=False, eps=None):
    env.reset()

    generate_map(env, map_size, handles)

    step_ct = 0
    done = False

    n = len(handles)
    obs  = [[] for _ in range(n)]
    ids  = [[] for _ in range(n)]
    acts = [[] for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    sample_buffer = magent.utility.EpisodesBuffer(capacity=1000)
    total_reward = [0 for _ in range(n)]

    print("===== sample =====")
    print("eps %.2f number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            acts[i] = models[i].infer_action(obs[i], ids[i], 'e_greedy', eps=eps)
            acts[i][:] = 3
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()
github geek-ai / MAgent / examples / train_gather.py View on Github external
print_every=10, record=False, render=False, eps=None):
    env.reset()
    generate_map(env, map_size, food_handle, handles)

    step_ct = 0
    total_reward = 0
    done = False

    pos_reward_ct = set()

    n = len(handles)
    obs  = [None for _ in range(n)]
    ids  = [None for _ in range(n)]
    acts = [None for _ in range(n)]
    nums = [env.get_num(handle) for handle in handles]
    sample_buffer = magent.utility.EpisodesBuffer(capacity=5000)

    print("===== sample =====")
    print("eps %s number %s" % (eps, nums))
    start_time = time.time()
    while not done:
        # take actions for every model
        for i in range(n):
            obs[i] = env.get_observation(handles[i])
            ids[i] = env.get_agent_id(handles[i])
            acts[i] = models[i].infer_action(obs[i], ids[i], policy='e_greedy', eps=eps)
            env.set_action(handles[i], acts[i])

        # simulate one step
        done = env.step()

        # sample