How to use the openai.deepq.deepq.ActWrapper function in openai

To help you get started, we’ve selected a few openai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github niksaz / dota2-expert-demo / openai / deepq / deepq.py View on Github external
def load_act(path):
        with open(path, "rb") as f:
            model_data, act_params = cloudpickle.load(f)
        act = deepq.build_act(**act_params)
        sess = tf.Session()
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            load_variables(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
github niksaz / dota2-expert-demo / openai / deepq / deepq.py View on Github external
act, _, _, debug = deepq.build_train(
        scope='deepq_act',
        make_obs_ph=make_obs_ph,
        q_func=q_func,
        num_actions=env.action_space.n,
        optimizer=tf.train.AdamOptimizer(learning_rate=lr),
        gamma=gamma,
        grad_norm_clipping=10, )

    act_params = {
        'make_obs_ph': make_obs_ph,
        'q_func': q_func,
        'num_actions': env.action_space.n, }

    act = ActWrapper(act, act_params)

    exploration = LinearSchedule(schedule_timesteps=int(exploration_fraction * total_timesteps),
                                 initial_p=exploration_initial_eps,
                                 final_p=exploration_final_eps)

    U.initialize()

    reward_shaper = ActionAdviceRewardShaper(config=config)
    reward_shaper.load()
    reward_shaper.generate_merged_demo()

    full_exp_name = '{}-{}'.format(date.today().strftime('%Y%m%d'), experiment_name)
    experiment_dir = os.path.join('experiments', full_exp_name)
    os.makedirs(experiment_dir, exist_ok=True)

    summary_dir = os.path.join(experiment_dir, 'summaries')