Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Constructor for PPO actor network
Args:
D_obs: observation space dimension, scalar
D_act: action space dimension, scalar
hidden_sizes: list of fully connected dimension
init_log_sig: initial value for log standard deviation parameter
'''
super(PPO_ActorNetwork, self).__init__()
# assumes D_obs here is the correct RNN hidden dim
xp_input = L.Placeholder((None, D_obs))
xp = L.Linear(hidden_sizes[0])(xp_input)
xp = L.ReLU()(xp)
xp = L.Linear(hidden_sizes[1])(xp)
xp = L.ReLU()(xp)
xp = L.Linear(D_act)(xp)
xp = L.Tanh()(xp)
self.model = L.Functional(inputs=xp_input, outputs=xp)
self.model.build((None, D_obs))
self.log_var = nn.Parameter(torch.zeros(1, D_act) + init_log_sig)
def __init__(self, D_in, D_act, hidden_sizes=[300, 200], use_layernorm=True):
super(ActorNetworkX, self).__init__()
xp_input = L.Placeholder((None, D_in))
xp = L.Linear(hidden_sizes[0])(xp_input)
xp = L.ReLU()(xp)
if use_layernorm:
# Normalize 1 dimension
xp = L.LayerNorm(1)(xp)
xp = L.Linear(hidden_sizes[1])(xp)
xp = L.ReLU()(xp)
if use_layernorm:
xp = L.LayerNorm(1)(xp)
xp = L.Linear(D_act)(xp)
xp = L.Tanh()(xp)
self.model = L.Functional(inputs=xp_input, outputs=xp)
self.model.build((None, D_in))