Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
assert isinstance(data_list_4_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_4)), data_list_4)))
data_list_5 = [np.zeros(2), np.zeros((3, 3))]
data_list_5_torch = to_torch(data_list_5)
assert isinstance(data_list_5_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
data_array = np.random.rand(3, 2, 2)
data_empty_tensor = to_torch(data_array[[]])
assert isinstance(data_empty_tensor, torch.Tensor)
assert data_empty_tensor.shape == (0, 2, 2)
data_empty_array = to_numpy(data_empty_tensor)
assert isinstance(data_empty_array, np.ndarray)
assert data_empty_array.shape == (0, 2, 2)
assert np.allclose(to_numpy(to_torch(data_array)), data_array)
data_list_2 = [np.random.rand(3, 3), np.random.rand(3, 3)]
data_list_2_torch = to_torch(data_list_2)
assert data_list_2_torch.shape == (2, 3, 3)
assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
data_list_3_torch = to_torch(data_list_3)
assert isinstance(data_list_3_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_3)), data_list_3)))
data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
data_list_4_torch = to_torch(data_list_4)
assert isinstance(data_list_4_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_4)), data_list_4)))
data_list_5 = [np.zeros(2), np.zeros((3, 3))]
data_list_5_torch = to_torch(data_list_5)
assert isinstance(data_list_5_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
data_array = np.random.rand(3, 2, 2)
data_empty_tensor = to_torch(data_array[[]])
assert isinstance(data_empty_tensor, torch.Tensor)
assert data_empty_tensor.shape == (0, 2, 2)
data_empty_array = to_numpy(data_empty_tensor)
assert isinstance(data_empty_array, np.ndarray)
assert data_empty_array.shape == (0, 2, 2)
assert np.allclose(to_numpy(to_torch(data_array)), data_array)
zip(to_numpy(to_torch(data_list_3)), data_list_3)))
data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
data_list_4_torch = to_torch(data_list_4)
assert isinstance(data_list_4_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_4)), data_list_4)))
data_list_5 = [np.zeros(2), np.zeros((3, 3))]
data_list_5_torch = to_torch(data_list_5)
assert isinstance(data_list_5_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
data_array = np.random.rand(3, 2, 2)
data_empty_tensor = to_torch(data_array[[]])
assert isinstance(data_empty_tensor, torch.Tensor)
assert data_empty_tensor.shape == (0, 2, 2)
data_empty_array = to_numpy(data_empty_tensor)
assert isinstance(data_empty_array, np.ndarray)
assert data_empty_array.shape == (0, 2, 2)
assert np.allclose(to_numpy(to_torch(data_array)), data_array)
)
a_torch_float = to_torch(batch.a, dtype=torch.float32)
assert a_torch_float.dtype == torch.float32
a_torch_double = to_torch(batch.a, dtype=torch.float64)
assert a_torch_double.dtype == torch.float64
batch_torch_float = to_torch(batch, dtype=torch.float32)
assert batch_torch_float.a.dtype == torch.float32
assert batch_torch_float.b.c.dtype == torch.float32
assert batch_torch_float.b.d.dtype == torch.float32
data_list = [float('nan'), 1]
data_list_torch = to_torch(data_list)
assert data_list_torch.dtype == torch.float64
data_list_2 = [np.random.rand(3, 3), np.random.rand(3, 3)]
data_list_2_torch = to_torch(data_list_2)
assert data_list_2_torch.shape == (2, 3, 3)
assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
data_list_3_torch = to_torch(data_list_3)
assert isinstance(data_list_3_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_3)), data_list_3)))
data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
data_list_4_torch = to_torch(data_list_4)
assert isinstance(data_list_4_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
assert all(starmap(np.allclose,
zip(to_numpy(to_torch(data_list_4)), data_list_4)))
data_list_5 = [np.zeros(2), np.zeros((3, 3))]
data_list_5_torch = to_torch(data_list_5)
assert isinstance(data_list_5_torch, list)
assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
def process_fn(self, batch: Batch, buffer: ReplayBuffer,
indice: np.ndarray) -> Batch:
if self._rew_norm:
mean, std = batch.rew.mean(), batch.rew.std()
if not np.isclose(std, 0):
batch.rew = (batch.rew - mean) / std
if self._lambda in [0, 1]:
return self.compute_episodic_return(
batch, None, gamma=self._gamma, gae_lambda=self._lambda)
v_ = []
with torch.no_grad():
for b in batch.split(self._batch, shuffle=False):
v_.append(self.critic(b.obs_next))
v_ = to_numpy(torch.cat(v_, dim=0))
return self.compute_episodic_return(
batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
def process_fn(self, batch: Batch, buffer: ReplayBuffer,
indice: np.ndarray) -> Batch:
if self._lambda in [0, 1]:
return self.compute_episodic_return(
batch, None, gamma=self._gamma, gae_lambda=self._lambda)
v_ = []
with torch.no_grad():
for b in batch.split(self._batch, shuffle=False):
v_.append(to_numpy(self.critic(b.obs_next)))
v_ = np.concatenate(v_, axis=0)
return self.compute_episodic_return(
batch, v_, gamma=self._gamma, gae_lambda=self._lambda)
result = Batch(act=self._make_batch(action_space.sample()))
else:
with torch.no_grad():
result = self.policy(self.data, last_state)
# convert None to Batch(), since None is reserved for 0-init
state = result.get('state', Batch())
if state is None:
state = Batch()
self.data.state = state
if hasattr(result, 'policy'):
self.data.policy = to_numpy(result.policy)
# save hidden state to policy._state, in order to save into buffer
self.data.policy._state = self.data.state
self.data.act = to_numpy(result.act)
if self._action_noise is not None:
self.data.act += self._action_noise(self.data.act.shape)
# step in env
obs_next, rew, done, info = self.env.step(
self.data.act if self._multi_env else self.data.act[0])
# move data to self.data
if not self._multi_env:
obs_next = self._make_batch(obs_next)
rew = self._make_batch(rew)
done = self._make_batch(done)
info = self._make_batch(info)
self.data.obs_next = obs_next
self.data.rew = rew
self.data.done = done
def learn(self, batch: Batch, **kwargs) -> Dict[str, float]:
if self._target and self._cnt % self._freq == 0:
self.sync_weight()
self.optim.zero_grad()
q = self(batch).logits
q = q[np.arange(len(q)), batch.act]
r = to_torch_as(batch.returns, q)
if hasattr(batch, 'update_weight'):
td = r - q
batch.update_weight(batch.indice, to_numpy(td))
impt_weight = to_torch_as(batch.impt_weight, q)
loss = (td.pow(2) * impt_weight).mean()
else:
loss = F.mse_loss(q, r)
loss.backward()
self.optim.step()
self._cnt += 1
return {'loss': loss.item()}
action_space = self.env.action_space
if isinstance(action_space, list):
result = Batch(act=[a.sample() for a in action_space])
else:
result = Batch(act=self._make_batch(action_space.sample()))
else:
with torch.no_grad():
result = self.policy(self.data, last_state)
# convert None to Batch(), since None is reserved for 0-init
state = result.get('state', Batch())
if state is None:
state = Batch()
self.data.state = state
if hasattr(result, 'policy'):
self.data.policy = to_numpy(result.policy)
# save hidden state to policy._state, in order to save into buffer
self.data.policy._state = self.data.state
self.data.act = to_numpy(result.act)
if self._action_noise is not None:
self.data.act += self._action_noise(self.data.act.shape)
# step in env
obs_next, rew, done, info = self.env.step(
self.data.act if self._multi_env else self.data.act[0])
# move data to self.data
if not self._multi_env:
obs_next = self._make_batch(obs_next)
rew = self._make_batch(rew)
done = self._make_batch(done)
def add(self, x: Union[float, list, np.ndarray, torch.Tensor]) -> float:
"""Add a scalar into :class:`MovAvg`. You can add ``torch.Tensor`` with
only one element, a python scalar, or a list of python scalar.
"""
if isinstance(x, torch.Tensor):
x = to_numpy(x.flatten())
if isinstance(x, list) or isinstance(x, np.ndarray):
for _ in x:
if _ not in self.banned:
self.cache.append(_)
elif x not in self.banned:
self.cache.append(x)
if self.size > 0 and len(self.cache) > self.size:
self.cache = self.cache[-self.size:]
return self.get()