Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_one_hot():
seq = "ACGTTTATNT"
assert len(seq) == 10
assert one_hot_dna(seq).shape == (10, 4)
assert one_hot(seq).shape == (10, 4)
assert np.all(one_hot_dna(seq) == one_hot(seq))
assert one_hot(pad(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 5)).shape == (5, 4)
assert trim(seq, 5) == 'TTTAT'
assert trim(seq, 5, 'start') == 'ACGTT'
assert trim(seq, 5, 'end') == 'TATNT'
with pytest.raises(Exception):
assert pad(seq, 5, 'end') == 'TATNT'
assert np.all(one_hot(seq)[0] == np.array([1, 0, 0, 0]))
assert np.all(one_hot(seq)[1] == np.array([0, 1, 0, 0]))
assert np.all(one_hot(seq)[2] == np.array([0, 0, 1, 0]))
assert np.all(one_hot(seq)[3] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[4] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-1] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-2] == np.array([0.25, 0.25, 0.25, 0.25]))
with pytest.raises(ValueError):
one_hot(['A', 'C'])
def test_tokenize_one_hot():
assert one_hot("ACG", DNA, "N").shape == (3, 4)
et = tokenize("ACG", DNA, "N")
assert et.shape == (3,)
assert np.array_equal(et, np.array([0, 1, 2]))
et = tokenize("TGTN", DNA, "N")
assert np.array_equal(et, np.array([3, 2, 3, -1])) # N mapped to -1
assert trim(seq, 5) == 'TTTAT'
assert trim(seq, 5, 'start') == 'ACGTT'
assert trim(seq, 5, 'end') == 'TATNT'
with pytest.raises(Exception):
assert pad(seq, 5, 'end') == 'TATNT'
assert np.all(one_hot(seq)[0] == np.array([1, 0, 0, 0]))
assert np.all(one_hot(seq)[1] == np.array([0, 1, 0, 0]))
assert np.all(one_hot(seq)[2] == np.array([0, 0, 1, 0]))
assert np.all(one_hot(seq)[3] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[4] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-1] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-2] == np.array([0.25, 0.25, 0.25, 0.25]))
with pytest.raises(ValueError):
one_hot(['A', 'C'])
with pytest.raises(ValueError):
one_hot_dna(['A', 'C'])
assert one_hot(pad(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 5)).shape == (5, 4)
assert trim(seq, 5) == 'TTTAT'
assert trim(seq, 5, 'start') == 'ACGTT'
assert trim(seq, 5, 'end') == 'TATNT'
with pytest.raises(Exception):
assert pad(seq, 5, 'end') == 'TATNT'
assert np.all(one_hot(seq)[0] == np.array([1, 0, 0, 0]))
assert np.all(one_hot(seq)[1] == np.array([0, 1, 0, 0]))
assert np.all(one_hot(seq)[2] == np.array([0, 0, 1, 0]))
assert np.all(one_hot(seq)[3] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[4] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-1] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-2] == np.array([0.25, 0.25, 0.25, 0.25]))
with pytest.raises(ValueError):
one_hot(['A', 'C'])
with pytest.raises(ValueError):
one_hot_dna(['A', 'C'])
def test_one_hot():
seq = "ACGTTTATNT"
assert len(seq) == 10
assert one_hot_dna(seq).shape == (10, 4)
assert one_hot(seq).shape == (10, 4)
assert np.all(one_hot_dna(seq) == one_hot(seq))
assert one_hot(pad(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 5)).shape == (5, 4)
assert trim(seq, 5) == 'TTTAT'
assert trim(seq, 5, 'start') == 'ACGTT'
assert trim(seq, 5, 'end') == 'TATNT'
with pytest.raises(Exception):
assert pad(seq, 5, 'end') == 'TATNT'
assert np.all(one_hot(seq)[0] == np.array([1, 0, 0, 0]))
assert np.all(one_hot(seq)[1] == np.array([0, 1, 0, 0]))
assert np.all(one_hot(seq)[2] == np.array([0, 0, 1, 0]))
def test_one_hot():
seq = "ACGTTTATNT"
assert len(seq) == 10
assert one_hot_dna(seq).shape == (10, 4)
assert one_hot(seq).shape == (10, 4)
assert np.all(one_hot_dna(seq) == one_hot(seq))
assert one_hot(pad(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 20)).shape == (20, 4)
assert one_hot(fixed_len(seq, 5)).shape == (5, 4)
assert trim(seq, 5) == 'TTTAT'
assert trim(seq, 5, 'start') == 'ACGTT'
assert trim(seq, 5, 'end') == 'TATNT'
with pytest.raises(Exception):
assert pad(seq, 5, 'end') == 'TATNT'
assert np.all(one_hot(seq)[0] == np.array([1, 0, 0, 0]))
assert np.all(one_hot(seq)[1] == np.array([0, 1, 0, 0]))
assert np.all(one_hot(seq)[2] == np.array([0, 0, 1, 0]))
assert np.all(one_hot(seq)[3] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[4] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-1] == np.array([0, 0, 0, 1]))
assert np.all(one_hot(seq)[-2] == np.array([0.25, 0.25, 0.25, 0.25]))
def __call__(self, seq):
if self.alphabet == DNA and self.neutral_alphabet == ['N'] and self.neutral_value == 0.25:
return F.one_hot_dna(seq, self.dtype)
else:
return F.one_hot(seq,
alphabet=self.alphabet,
neutral_alphabet=self.neutral_alphabet,
neutral_value=self.neutral_value,
dtype=self.dtype)