Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python3
import gym
import numpy as np
from gym.envs.mujoco.mujoco_env import MujocoEnv
from learn2learn.gym.envs.meta_env import MetaEnv
class AntDirectionEnv(MetaEnv, MujocoEnv, gym.utils.EzPickle):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/gym/envs/mujoco/ant_direction.py)
**Description**
This environment requires the Ant to learn to run in a random direction in the
XY plane. At each time step the ant receives a signal composed of a
control cost and a reward equal to its average velocity in the direction
of the plane. The tasks are 2d-arrays sampled uniformly along the unit circle.
The target direction is indicated by the vector from the origin to the sampled point.
The velocity is calculated as the distance (in the target direction) of the ant's torso
position before and after taking the specified action divided by a small value dt.
As noted in [1], a small positive bonus is added to the reward to stop the ant from
prematurely ending the episode.
**Credit**
#!/usr/bin/env python3
import numpy as np
from gym import spaces
from gym.utils import seeding
from learn2learn.gym.envs.meta_env import MetaEnv
class Particles2DEnv(MetaEnv):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/gym/envs/particles/particles_2d.py)
**Description**
Each task is defined by the location of the goal. A point mass
receives a directional force and moves accordingly
(clipped in [-0.1,0.1]). The reward is equal to the negative
distance from the goal.
**Credit**
Adapted from Jonas Rothfuss' implementation.
"""
#!/usr/bin/env python3
import gym
import numpy as np
from gym.envs.mujoco.mujoco_env import MujocoEnv
from learn2learn.gym.envs.meta_env import MetaEnv
class AntForwardBackwardEnv(MetaEnv, MujocoEnv, gym.utils.EzPickle):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/gym/envs/mujoco/ant_forward_backward.py)
**Description**
This environment requires the ant to learn to run forward or backward.
At each time step the ant receives a signal composed of a
control cost and a reward equal to its average velocity in the direction
of the plane. The tasks are Bernoulli samples on {-1, 1} with probability 0.5, where -1 indicates the ant should
move backward and +1 indicates the ant should move forward.
The velocity is calculated as the distance (in the direction of the plane) of the ant's torso
position before and after taking the specified action divided by a small value dt.
As noted in [1], a small positive bonus is added to the reward to stop the ant from
prematurely ending the episode.
**Credit**
#!/usr/bin/env python3
import gym
import numpy as np
from gym.envs.mujoco.mujoco_env import MujocoEnv
from learn2learn.gym.envs.meta_env import MetaEnv
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))
class HumanoidDirectionEnv(MetaEnv, MujocoEnv, gym.utils.EzPickle):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/gym/envs/mujoco/humanoid_direction.py)
**Description**
This environment requires the humanoid to learn to run in a random direction in the
XY plane. At each time step the humanoid receives a signal composed of a
control cost and a reward equal to its average velocity in the target direction.
The tasks are 2d-arrays sampled uniformly along the unit circle.
The target direction is indicated by the vector from the origin to the sampled point.
The velocity is calculated as the distance (in the target direction) of the humanoid's torso
position before and after taking the specified action divided by a small value dt.
A small positive bonus is added to the reward to stop the humanoid from
prematurely ending the episode.
**Credit**
#!/usr/bin/env python3
import gym
import numpy as np
from gym.envs.mujoco.mujoco_env import MujocoEnv
from learn2learn.gym.envs.meta_env import MetaEnv
class HalfCheetahForwardBackwardEnv(MetaEnv, MujocoEnv, gym.utils.EzPickle):
"""
[[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/gym/envs/mujoco/halfcheetah_forward_backward.py)
**Description**
This environment requires the half-cheetah to learn to run forward or backward.
At each time step the half-cheetah receives a signal composed of a
control cost and a reward equal to its average velocity in the direction
of the plane. The tasks are Bernoulli samples on {-1, 1} with probability 0.5, where -1 indicates the half-cheetah should
move backward and +1 indicates the half-cheetah should move forward.
The velocity is calculated as the distance (in the target direction) of the half-cheetah's torso
position before and after taking the specified action divided by a small value dt.
**Credit**
Adapted from Jonas Rothfuss' implementation.