Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self._max_timestep_eval = max_timestep_eval
self._gamma = gamma
self._lambda_ = lambda_
self._c1 = c1
self._c2 = c2
self._eval_every_n = eval_every_n
self._save_every_n = save_every_n
self._done_frac_for_policy_save = done_frac_for_policy_save
self._n_evals = n_evals
self._len_history_for_policy = len_history_for_policy
self._eval_temperatures = eval_temperatures
self._separate_eval = separate_eval
action_space = self.train_env.action_space
assert isinstance(
action_space, (gym.spaces.Discrete, gym.spaces.MultiDiscrete))
if isinstance(action_space, gym.spaces.Discrete):
n_actions = action_space.n
n_controls = 1
else:
(n_controls,) = action_space.nvec.shape
assert n_controls > 0
assert onp.min(action_space.nvec) == onp.max(action_space.nvec), (
"Every control must have the same number of actions.")
n_actions = action_space.nvec[0]
self._n_actions = n_actions
self._n_controls = n_controls
self._rng = trax.get_random_number_generator_and_set_seed(random_seed)
self._rng, key1 = jax_random.split(self._rng, num=2)
vocab_size = policy_and_value_vocab_size
def __init__(self):
self.REVERSE_STATE = 1
self.GOAL_STATE = 3
self.NUM_STATES = self.GOAL_STATE + 1
self.observation_space = spaces.Box(0, 1, shape=(self.NUM_STATES,))
self.action_space = spaces.Discrete(2)
self.current_state = 0
self.goal_reached = False
self.max_steps = 500
def flat_dim(space):
if isinstance(space, gym.spaces.Box):
return np.prod(space.low.shape)
elif isinstance(space, gym.spaces.Discrete):
return space.n
elif isinstance(space, gym.spaces.Tuple):
return np.sum([flat_dim(x) for x in space.spaces])
else:
raise NotImplementedError
self._g = np.array([10., 10.])
# stochastic dynamics
self._A = np.array([[1.1, 0.], [1.0, 1.0]])
self._B = np.array([[1.], [0.]])
self._c = - self._A @ self._g # stable at goal
self._sigma = 1e-8 * np.eye(2)
self._gw = np.array([1e1, 1e1])
self._uw = np.array([1.])
self._xmax = np.array([np.inf, np.inf])
self._umax = np.inf
self.action_space = spaces.Box(low=-self._umax,
high=self._umax, shape=(1,))
self.observation_space = spaces.Box(low=-self._xmax,
high=self._xmax)
self.state = None
self.np_random = None
self.seed()
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
## Adversarial setup
self._adv_f_bname = b'pole' #Byte String name of body on which the adversary force will be applied
bnames = self.model.body_names
self._adv_bindex = bnames.index(self._adv_f_bname) #Index of the body on which the adversary force will be applied
adv_max_force = 5.
high_adv = np.ones(2)*adv_max_force
low_adv = -high_adv
self.adv_action_space = spaces.Box(low_adv, high_adv)
self.pro_action_space = self.action_space
else:
high = np.array([1] * brain.vector_action_space_size[0])
self._action_space = spaces.Box(-high, high, dtype=np.float32)
high = np.array([np.inf] * brain.vector_observation_space_size)
self.action_meanings = brain.vector_action_descriptions
if self.use_visual:
if brain.camera_resolutions[0]["blackAndWhite"]:
depth = 1
else:
depth = 3
self._observation_space = spaces.Box(0, 1, dtype=np.float32,
shape=(brain.camera_resolutions[0]["height"],
brain.camera_resolutions[0]["width"],
depth))
else:
self._observation_space = spaces.Box(-high, high, dtype=np.float32)
def __init__(self):
self.n_seed = None
self.observation_space = spaces.Tuple([
# player states
spaces.Tuple([
spaces.Tuple([ # p0, p1, p2
spaces.Discrete(200), # score
spaces.Tuple([ # income
spaces.MultiDiscrete([13, 4])
] * 52),
] * 3),
spaces.Discrete(200), # p3 score
spaces.Tuple([ # hand
spaces.MultiDiscrete([13, 4])
] * 13),
spaces.Tuple([ # income
spaces.MultiDiscrete([13, 4])
] * 52),
]),
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):
self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, [1]), # (w,b)
spaces.Discrete(maxRounds+1)))
self.reward_range = (0, maxWealth)
self.edge = edge
self.wealth = initialWealth
self.initialWealth = initialWealth
self.maxRounds = maxRounds
self.maxWealth = maxWealth
self._seed()
self._reset()
def spaces(self, spaces):
if isinstance(spaces, gym.spaces.Space):
spaces = [spaces]
if not isinstance(spaces, (list, tuple, np.ndarray)):
raise TypeError("Expecting the given spaces to be a list/tuple/np.ndarray of `gym.spaces.Space`, but "
"got instead: {}".format(type(spaces)))
for i, space in enumerate(spaces):
if not isinstance(space, gym.spaces.Space):
raise TypeError("Expecting the {}th item to be an instance of `gym.spaces.Space`, but got instead: "
"{}".format(i, type(space)))
self._spaces = spaces