Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
NotImplementedError: For action spaces other than Box and Discrete.
Returns:
Wrapped OpenAI Gym environment.
"""
if isinstance(config.env, str):
env = gym.make(config.env)
else:
env = config.env()
# Ensure that the environment has the specification attribute set as expected
# by the monitor wrapper.
if not hasattr(env, 'spec'):
setattr(env, 'spec', getattr(env, 'spec', None))
if config.max_length:
env = tools.wrappers.LimitDuration(env, config.max_length)
env = gym.wrappers.Monitor(
env, outdir, lambda unused_episode_number: True)
if isinstance(env.action_space, gym.spaces.Box):
env = tools.wrappers.RangeNormalize(env)
env = tools.wrappers.ClipAction(env)
elif isinstance(env.action_space, gym.spaces.Discrete):
env = tools.wrappers.RangeNormalize(env, action=False)
else:
message = "Unsupported action space '{}'".format(type(env.action_space))
raise NotImplementedError(message)
env = tools.wrappers.ConvertTo32Bit(env)
env = tools.wrappers.CacheSpaces(env)
return env
import shutil, os, logging
import gym
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
update_argument_parser(parser, GENERAL_OPTIONS)
parser.add_argument("--env",required=True)
parser.add_argument("--agent",required=True)
parser.add_argument("--plot",action="store_true")
args,_ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])
env = make(args.env)
env_spec = env.spec
mondir = args.outfile + ".dir"
if os.path.exists(mondir): shutil.rmtree(mondir)
os.mkdir(mondir)
env = gym.wrappers.Monitor(env, mondir, video_callable=None if args.video else VIDEO_NEVER)
agent_ctor = get_agent_cls(args.agent)
update_argument_parser(parser, agent_ctor.options)
args = parser.parse_args()
if args.timestep_limit == 0:
args.timestep_limit = env_spec.timestep_limit
cfg = args.__dict__
np.random.seed(args.seed)
agent = agent_ctor(env.observation_space, env.action_space, cfg)
if args.use_hdf:
hdf, diagnostics = prepare_h5_file(args)
gym.logger.setLevel(logging.WARN)
COUNTER = 0
def callback(stats):
global COUNTER
COUNTER += 1
updateTargetNetwork = 10000
explorationRate = 1
minibatch_size = 64
learnStart = 64
learningRate = 0.00025
discountFactor = 0.99
memorySize = 1000000
network_inputs = 17
network_outputs = 3
network_structure = [300, 300]
current_epoch = 0
deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ.initNetworks(network_structure)
# env.monitor.start(outdir, force=True, seed=None)
gym.wrappers.Monitor(env, outdir, force=True)
else:
#Load weights, monitor info and parameter info.
#ADD TRY CATCH fro this else
with open(params_json) as outfile:
d = json.load(outfile)
epochs = d.get('epochs') * 10
steps = d.get('steps')
updateTargetNetwork = d.get('updateTargetNetwork')
explorationRate = d.get('explorationRate')
minibatch_size = d.get('minibatch_size')
learnStart = d.get('learnStart')
learningRate = d.get('learningRate')
discountFactor = d.get('discountFactor')
memorySize = d.get('memorySize')
network_inputs = d.get('network_inputs')
network_outputs = d.get('network_outputs')
def make_env(test):
env = gym.make(args.env)
# Use different random seeds for train and test envs
env_seed = 2 ** 32 - 1 - args.seed if test else args.seed
env.seed(env_seed)
# Cast observations to float32 because our model uses float32
env = chainerrl.wrappers.CastObservationToFloat32(env)
if args.monitor:
env = gym.wrappers.Monitor(env, args.outdir)
if isinstance(env.action_space, spaces.Box):
misc.env_modifiers.make_action_filtered(env, clip_action_filter)
if not test:
# Scale rewards (and thus returns) to a reasonable range so that
# training is easier
env = chainerrl.wrappers.ScaleReward(env, args.reward_scale_factor)
if args.render and not test:
env = chainerrl.wrappers.Render(env)
return env
def monitored_environment(self):
if self.use_monitor:
self.log_dir = tempfile.mkdtemp()
self.emulator.env = gym.wrappers.Monitor(self.emulator.env, self.log_dir)
yield
self.emulator.env.close()
def testApproxQAgent():
env = gym.make("PuckWorld-v0")
#env = SimpleGridWorld()
directory = "/home/qiang/workspace/reinforce/python/monitor"
env = gym.wrappers.Monitor(env, directory, force=True)
agent = ApproxQAgent(env,
trans_capacity = 50000,
hidden_dim = 32)
env.reset()
print("Learning...")
agent.learning(gamma=0.99,
learning_rate = 1e-3,
batch_size = 64,
max_episodes=5000, # 最大训练Episode数量
min_epsilon = 0.2, # 最小Epsilon
epsilon_factor = 0.3,# 开始使用最小Epsilon时Episode的序号占最大
# Episodes序号之比,该比值越小,表示使用
# min_epsilon的episode越多
epochs = 2 # 每个batch_size训练的次数
)
def make_env(idx, test):
# Use different random seeds for train and test envs
process_seed = int(process_seeds[idx])
env_seed = 2 ** 32 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
clip_rewards=not test,
frame_stack=False,
)
if test:
# Randomize actions like epsilon-greedy in evaluation as well
env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon)
env.seed(env_seed)
if args.monitor:
env = gym.wrappers.Monitor(
env, args.outdir,
mode='evaluation' if test else 'training')
if args.render:
env = chainerrl.wrappers.Render(env)
return env
def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
clip_rewards=not test)
env.seed(int(env_seed))
if args.monitor:
env = gym.wrappers.Monitor(
env, args.outdir,
mode='evaluation' if test else 'training')
if args.render:
env = chainerrl.wrappers.Render(env)
return env
GAMMA = 0.99
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--runfile", required=True, help="Name of the runfile to use")
parser.add_argument("-m", "--monitor", help="Use monitor and save it's data into given dir")
args = parser.parse_args()
run = runfile.RunFile(args.runfile)
cuda_enabled = run.getboolean("defaults", "cuda", fallback=False)
env = gym.make(run.get("defaults", "env")).env
if args.monitor:
env = gym.wrappers.Monitor(env, args.monitor)
# model returns probability of actions
model = nn.Sequential(
nn.Linear(env.observation_space.shape[0], 50),
nn.ReLU(),
# nn.Linear(100, 50),
# nn.ReLU(),
nn.Linear(50, env.action_space.n),
nn.Softmax()
)
if cuda_enabled:
model.cuda()
agent = ptan.agent.PolicyAgent(model, cuda=cuda_enabled)
exp_source = ptan.experience.ExperienceSource(env=env, agent=agent, steps_count=run.getint("defaults", "n_steps"))
exp_buffer = ptan.experience.ExperienceReplayBuffer(exp_source, run.getint("exp_buffer", "size"))
def get_env(seed):
env = gym.make('LunarLander-v2')
set_global_seeds(seed)
env.seed(seed)
expt_dir = '/tmp/hw3_vid_dir/'
env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True, video_callable=False)
return env