Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, screen_ratio=4, coords_ratio=4, use_color=True, use_rc_frame=True, stack=3, frame_skip=4, action_repeat=4):
utils.EzPickle.__init__(self, 'montezuma_revenge', 'image')
self.env = gym.make('MontezumaRevengeNoFrameskip-v4').unwrapped
self.ale = self.env.ale
self.ale.setFloat('repeat_action_probability'.encode('utf-8'), 0) # deterministic
self.max_lives = self.ale.lives()
# observations
self.screen_ratio = screen_ratio
self.original_height = 224
self.original_width = 160
self.screen_height = self.original_height // screen_ratio
self.screen_width = self.original_width // screen_ratio
self.screen_shape = (self.screen_height, self.screen_width)
self.use_color = use_color
self.use_rc_frame = use_rc_frame
self.stack = stack
self.frame_skip = frame_skip
n_frames = stack * (3 * use_color + 1 * (not use_color) + use_rc_frame)
import numpy as np
from gym.envs.mujoco import mujoco_env
from gym import utils
DEFAULT_CAMERA_CONFIG = {
'trackbodyid': 2,
'distance': 3.0,
'lookat': np.array((0.0, 0.0, 1.15)),
'elevation': -20.0,
}
class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self,
xml_file='hopper.xml',
forward_reward_weight=1.0,
ctrl_cost_weight=1e-3,
healthy_reward=1.0,
terminate_when_unhealthy=True,
healthy_state_range=(-100.0, 100.0),
healthy_z_range=(0.7, float('inf')),
healthy_angle_range=(-0.2, 0.2),
reset_noise_scale=5e-3,
exclude_current_positions_from_observation=True):
utils.EzPickle.__init__(**locals())
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
action_string, first = check_button(action_string, action, 0x4000, 'L3', first)
action_string, first = check_button(action_string, action, 0x8000, 'R3', first)
if action_string == '':
action_string = 'NOOP'
return action_string
def to_ram(rle):
ram_size = rle.getRAMSize()
ram = np.zeros((ram_size),dtype=np.uint8)
rle.getRAM(ram)
return ram
class RleEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human', 'rgb_array']}
def __init__(self, game='classic_kong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
"""Frameskip should be either a tuple (indicating a random range to
choose from, with the top value exclude), or an int."""
utils.EzPickle.__init__(self, game, obs_type)
assert obs_type in ('ram', 'image')
self.game_path = self.get_rom_path(game)
self._obs_type = obs_type
self.frameskip = frameskip
self.rle = rle_python_interface.RLEInterface()
self.viewer = None
def _seed(self, seed=None):
self.np_random, seed = utils.seeding.np_random(seed)
return [seed]
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
## Adversarial setup
self._adv_f_bname = b'pole' #Byte String name of body on which the adversary force will be applied
bnames = self.model.body_names
self._adv_bindex = bnames.index(self._adv_f_bname) #Index of the body on which the adversary force will be applied
adv_max_force = 5.
high_adv = np.ones(2)*adv_max_force
low_adv = -high_adv
self.adv_action_space = spaces.Box(low_adv, high_adv)
self.pro_action_space = self.action_space
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
utils.EzPickle.__init__(self)
def _step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
survive_reward = 1.0
reward = forward_reward - ctrl_cost - contact_cost + survive_reward
state = self.state_vector()
notdone = np.isfinite(state).all() \
self.include_obs_history = 1
self.include_act_history = 0
# data structure for modeling delays in observation and action
self.observation_buffer = []
self.action_buffer = []
self.obs_delay = 0
self.act_delay = 0
self.tilt_z = 0
self.current_step = 0
self.max_step = 1000
mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
utils.EzPickle.__init__(self)
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env
class AntTruncatedObsEnv(mujoco_env.MujocoEnv, utils.EzPickle):
"""
External forces (sim.data.cfrc_ext) are removed from the observation.
Otherwise identical to Ant-v2 from
https://github.com/openai/gym/blob/master/gym/envs/mujoco/ant.py
"""
def __init__(self):
mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
utils.EzPickle.__init__(self)
def step(self, a):
xposbefore = self.get_body_com("torso")[0]
self.do_simulation(a, self.frame_skip)
xposafter = self.get_body_com("torso")[0]
forward_reward = (xposafter - xposbefore)/self.dt
ctrl_cost = .5 * np.square(a).sum()
contact_cost = 0.5 * 1e-3 * np.sum(
def __init__(self):
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# *************************************************************************
import numpy as np
import os
from gym import utils
from gym.envs.mujoco import mujoco_env
class MujocoQuadEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self, xml_name="quadrotor_ground.xml"):
xml_path = os.path.join(os.path.dirname(__file__), "./assets", xml_name)
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, xml_path, 2)
def step(self, a):
reward = 0
self.do_simulation(self.clip_action(a), self.frame_skip)
ob = self._get_obs()
notdone = np.isfinite(ob).all()
done = not notdone
return ob, reward, done, {}
def clip_action(self, action):