How to use the gym.wrappers.Monitor function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github google-research / batch-ppo / agents / scripts / visualize.py View on Github external
NotImplementedError: For action spaces other than Box and Discrete.

  Returns:
    Wrapped OpenAI Gym environment.
  """
  if isinstance(config.env, str):
    env = gym.make(config.env)
  else:
    env = config.env()
  # Ensure that the environment has the specification attribute set as expected
  # by the monitor wrapper.
  if not hasattr(env, 'spec'):
    setattr(env, 'spec', getattr(env, 'spec', None))
  if config.max_length:
    env = tools.wrappers.LimitDuration(env, config.max_length)
  env = gym.wrappers.Monitor(
      env, outdir, lambda unused_episode_number: True)
  if isinstance(env.action_space, gym.spaces.Box):
    env = tools.wrappers.RangeNormalize(env)
    env = tools.wrappers.ClipAction(env)
  elif isinstance(env.action_space, gym.spaces.Discrete):
    env = tools.wrappers.RangeNormalize(env, action=False)
  else:
    message = "Unsupported action space '{}'".format(type(env.action_space))
    raise NotImplementedError(message)
  env = tools.wrappers.ConvertTo32Bit(env)
  env = tools.wrappers.CacheSpaces(env)
  return env
github joschu / modular_rl / run_pg.py View on Github external
import shutil, os, logging
import gym

if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    update_argument_parser(parser, GENERAL_OPTIONS)
    parser.add_argument("--env",required=True)
    parser.add_argument("--agent",required=True)
    parser.add_argument("--plot",action="store_true")
    args,_ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])
    env = make(args.env)
    env_spec = env.spec
    mondir = args.outfile + ".dir"
    if os.path.exists(mondir): shutil.rmtree(mondir)
    os.mkdir(mondir)
    env = gym.wrappers.Monitor(env, mondir, video_callable=None if args.video else VIDEO_NEVER)
    agent_ctor = get_agent_cls(args.agent)
    update_argument_parser(parser, agent_ctor.options)
    args = parser.parse_args()
    if args.timestep_limit == 0:
        args.timestep_limit = env_spec.timestep_limit
    cfg = args.__dict__
    np.random.seed(args.seed)
    agent = agent_ctor(env.observation_space, env.action_space, cfg)
    if args.use_hdf:
        hdf, diagnostics = prepare_h5_file(args)
    gym.logger.setLevel(logging.WARN)

    COUNTER = 0
    def callback(stats):
        global COUNTER
        COUNTER += 1
github xpharry / dqn-adaptive-cruise / gym-vehicle / experiments / vehicle / circle_track3_vehicle_lcc_dqn_fcnn.py View on Github external
updateTargetNetwork = 10000
        explorationRate = 1
        minibatch_size = 64
        learnStart = 64
        learningRate = 0.00025
        discountFactor = 0.99
        memorySize = 1000000
        network_inputs = 17
        network_outputs = 3
        network_structure = [300, 300]
        current_epoch = 0

        deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
        deepQ.initNetworks(network_structure)
        # env.monitor.start(outdir, force=True, seed=None)
        gym.wrappers.Monitor(env, outdir, force=True)
    else:
        #Load weights, monitor info and parameter info.
        #ADD TRY CATCH fro this else
        with open(params_json) as outfile:
            d = json.load(outfile)
            epochs = d.get('epochs') * 10
            steps = d.get('steps')
            updateTargetNetwork = d.get('updateTargetNetwork')
            explorationRate = d.get('explorationRate')
            minibatch_size = d.get('minibatch_size')
            learnStart = d.get('learnStart')
            learningRate = d.get('learningRate')
            discountFactor = d.get('discountFactor')
            memorySize = d.get('memorySize')
            network_inputs = d.get('network_inputs')
            network_outputs = d.get('network_outputs')
github chainer / chainerrl / examples / mujoco / train_ddpg_gym.py View on Github external
def make_env(test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        env_seed = 2 ** 32 - 1 - args.seed if test else args.seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = gym.wrappers.Monitor(env, args.outdir)
        if isinstance(env.action_space, spaces.Box):
            misc.env_modifiers.make_action_filtered(env, clip_action_filter)
        if not test:
            # Scale rewards (and thus returns) to a reasonable range so that
            # training is easier
            env = chainerrl.wrappers.ScaleReward(env, args.reward_scale_factor)
        if args.render and not test:
            env = chainerrl.wrappers.Render(env)
        return env
github steveKapturowski / tensorflow-rl / algorithms / actor_learner.py View on Github external
def monitored_environment(self):
        if self.use_monitor:
            self.log_dir = tempfile.mkdtemp()
            self.emulator.env = gym.wrappers.Monitor(self.emulator.env, self.log_dir)

        yield
        self.emulator.env.close()
github qqiang00 / ReinforcemengLearningPractice / reinforce / agents.py View on Github external
def testApproxQAgent():
    env = gym.make("PuckWorld-v0")
    #env = SimpleGridWorld()
    directory = "/home/qiang/workspace/reinforce/python/monitor"
    
    env = gym.wrappers.Monitor(env, directory, force=True)
    agent = ApproxQAgent(env,
                         trans_capacity = 50000, 
                         hidden_dim = 32)
    env.reset()
    print("Learning...")  
    agent.learning(gamma=0.99, 
                   learning_rate = 1e-3,
                   batch_size = 64,
                   max_episodes=5000,   # 最大训练Episode数量
                   min_epsilon = 0.2,   # 最小Epsilon
                   epsilon_factor = 0.3,# 开始使用最小Epsilon时Episode的序号占最大
                                        # Episodes序号之比,该比值越小,表示使用
                                        # min_epsilon的episode越多
                    epochs = 2          # 每个batch_size训练的次数
                   )
github chainer / chainerrl / examples / atari / train_dqn_batch_ale.py View on Github external
def make_env(idx, test):
        # Use different random seeds for train and test envs
        process_seed = int(process_seeds[idx])
        env_seed = 2 ** 32 - 1 - process_seed if test else process_seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=not test,
            clip_rewards=not test,
            frame_stack=False,
        )
        if test:
            # Randomize actions like epsilon-greedy in evaluation as well
            env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon)
        env.seed(env_seed)
        if args.monitor:
            env = gym.wrappers.Monitor(
                env, args.outdir,
                mode='evaluation' if test else 'training')
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env
github chainer / chainerrl / examples / atari / train_acer_ale.py View on Github external
def make_env(process_idx, test):
        # Use different random seeds for train and test envs
        process_seed = process_seeds[process_idx]
        env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=not test,
            clip_rewards=not test)
        env.seed(int(env_seed))
        if args.monitor:
            env = gym.wrappers.Monitor(
                env, args.outdir,
                mode='evaluation' if test else 'training')
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env
github Shmuma / ptan / samples / reinforce.py View on Github external
GAMMA = 0.99


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-r", "--runfile", required=True, help="Name of the runfile to use")
    parser.add_argument("-m", "--monitor", help="Use monitor and save it's data into given dir")
    args = parser.parse_args()

    run = runfile.RunFile(args.runfile)

    cuda_enabled = run.getboolean("defaults", "cuda", fallback=False)
    env = gym.make(run.get("defaults", "env")).env
    if args.monitor:
        env = gym.wrappers.Monitor(env, args.monitor)

    # model returns probability of actions
    model = nn.Sequential(
        nn.Linear(env.observation_space.shape[0], 50),
        nn.ReLU(),
        # nn.Linear(100, 50),
        # nn.ReLU(),
        nn.Linear(50, env.action_space.n),
        nn.Softmax()
    )
    if cuda_enabled:
        model.cuda()

    agent = ptan.agent.PolicyAgent(model, cuda=cuda_enabled)
    exp_source = ptan.experience.ExperienceSource(env=env, agent=agent, steps_count=run.getint("defaults", "n_steps"))
    exp_buffer = ptan.experience.ExperienceReplayBuffer(exp_source, run.getint("exp_buffer", "size"))
github buaazhangfan / CS294-112-Deep-Reinforcement-Learning / hw3 / run_dqn_lander.py View on Github external
def get_env(seed):
    env = gym.make('LunarLander-v2')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True, video_callable=False)

    return env