Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, config, env, device, **kwargs):
super().__init__(**kwargs)
self.config = config
self.env = env
self.device = device
self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
for layer in self.feature_layers:
ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
self.to(self.device)
def __init__(self, config, env, device, **kwargs):
super().__init__(config, env, device, **kwargs)
feature_dim = config['nn.sizes'][-1]
self.feature_network = MLP(config, env, device, **kwargs)
if isinstance(env.action_space, Discrete):
self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
elif isinstance(env.action_space, Box):
self.action_head = DiagGaussianHead(feature_dim,
flatdim(env.action_space),
device,
config['agent.std0'],
config['agent.std_style'],
config['agent.std_range'],
config['agent.beta'],
**kwargs)
self.V_head = nn.Linear(feature_dim, 1).to(device)
ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
self.total_timestep = 0
self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
if config['agent.use_lr_scheduler']:
self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
def __init__(self, config, env, device, **kwargs):
super().__init__(**kwargs)
self.config = config
self.env = env
self.device = device
self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
for layer in self.feature_layers:
ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
self.to(self.device)
def __init__(self, config, env, device, **kwargs):
super().__init__(config, env, device, **kwargs)
feature_dim = config['nn.sizes'][-1]
self.feature_network = MLP(config, env, device, **kwargs)
if isinstance(env.action_space, Discrete):
self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
elif isinstance(env.action_space, Box):
self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs)
self.V_head = nn.Linear(feature_dim, 1).to(device)
ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
self.total_timestep = 0
self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
if config['agent.use_lr_scheduler']:
self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
def __init__(self, config, env, device, **kwargs):
super().__init__(**kwargs)
self.config = config
self.env = env
self.device = device
self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
for layer in self.feature_layers:
ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)
feature_dim = config['nn.sizes'][-1]
self.V_head = nn.Linear(feature_dim, 1).to(device)
ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
self.to(self.device)
def __init__(self, config, env, device, **kwargs):
super().__init__(**kwargs)
self.config = config
self.env = env
self.device = device
# Q1
self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
self.first_Q_head = nn.Linear(256, 1)
# Q2
self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
self.second_Q_head = nn.Linear(256, 1)
self.to(self.device)
def __init__(self, env, capacity, device):
self.env = env
self.capacity = capacity
self.device = device
self.observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
self.actions = np.zeros([capacity, flatdim(env.action_space)], dtype=np.float32)
self.rewards = np.zeros(capacity, dtype=np.float32)
self.next_observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
self.masks = np.zeros(capacity, dtype=np.float32)
self.size = 0
self.pointer = 0