Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
"""Define some standard alignments."""
self.a = ArrayAlignment(
array([[0, 1, 2], [3, 4, 5]]), conversion_f=aln_from_array
)
self.a2 = ArrayAlignment(["ABC", "DEF"], names=["x", "y"])
seqs = []
for s in ["abaa", "abbb"]:
seqs.append(AB.make_seq(s, preserve_case=True))
self.a = ArrayAlignment(seqs, alphabet=AB.alphabet)
self.b = Alignment(["ABC", "DEF"])
self.c = SequenceCollection(["ABC", "DEF"])
def setUp(self):
""" Initialize some variables for the tests """
self.canonical_abbrevs = "ACDEFGHIKLMNPQRSTVWY"
self.ambiguous_abbrevs = "BXZ"
self.all_to_a = [("A", self.canonical_abbrevs + self.ambiguous_abbrevs)]
self.charge_2 = alphabets["charge_2"]
self.hydropathy_3 = alphabets["hydropathy_3"]
self.orig = alphabets["orig"]
self.aln = ArrayAlignment(data={"1": "CDDFBXZ", "2": "CDD-BXZ", "3": "AAAASS-"})
self.aln2 = make_aligned_seqs(
data={"1": "CDDFBXZ", "2": "CDD-BXZ", "3": "AAAASS-"}
)
actual = recode_dense_alignment(self.aln, alphabet_id="hydropathy_3")
self.assertEqual(actual, expected_h3)
actual = recode_dense_alignment(self.aln, alphabet_def=self.hydropathy_3)
self.assertEqual(actual, expected_h3)
# different alphabet
actual = recode_dense_alignment(self.aln, alphabet_def=self.all_to_a)
self.assertEqual(actual, expected_aa)
# original charactars which aren't remapped are let in original state
actual = recode_dense_alignment(self.aln, alphabet_def=[("a", "b")])
self.assertEqual(actual, self.aln)
# non-alphabetic character mapped same as alphabetic characters
actual = recode_dense_alignment(self.aln, alphabet_def=[(".", "-")])
expected = ArrayAlignment(data={"1": "CDDFBXZ", "2": "CDD.BXZ", "3": "AAAASS."})
self.assertEqual(actual, expected)
def test_ArrayAlignment_without_moltype(self):
"""Expect MolType to be picked up from the sequences."""
m1 = ArraySequence("UCAG", alphabet=RNA.alphabets.degen_gapped, name="rna1")
m2 = ArraySequence("CCCR", alphabet=RNA.alphabets.degen_gapped, name="rna2")
da = ArrayAlignment([m1, m2])
exp_lines = [">rna1", "UCAG", ">rna2", "CCCR"]
self.assertEqual(str(da), "\n".join(exp_lines) + "\n")
def test_regular_to_model(self):
"""Regular seq should work with dense alignment"""
a = ArrayAlignment([self.r1, self.r2])
self.assertEqual(str(a), ">x\nAAA\n>y\nCCC\n")
a = ArrayAlignment([self.r1, self.r2], moltype=DNA)
self.assertEqual(str(a), ">x\nAAA\n>y\nCCC\n")
self.assertEqual(self.r1.name, "x")
def test_reverse_complement_info(self):
"""reverse_complement should preserve info attribute"""
dna = {
"seq1": "--ACGT--GT---",
"seq2": "TTACGTA-GT---",
"seq3": "--ACGTA-GCC--",
}
# alignment with gaps
aln = ArrayAlignment(data=dna, moltype=DNA, info={"key": "value"})
aln_rc = aln.rc()
self.assertEqual(aln_rc.info["key"], "value")
# check collection, with gaps
coll = SequenceCollection(data=dna, moltype=DNA, info={"key": "value"})
coll_rc = coll.rc()
self.assertEqual(coll_rc.info["key"], "value")
out_aln = aln.add_seqs(aln3, before_name="name1")
# test if insert before first seq works
self.assertEqual(str(out_aln), str(self.Class(data3 + data)))
out_aln = aln.add_seqs(aln3, after_name="name4")
# test if insert after last seq works
self.assertEqual(str(out_aln), str(self.Class(data + data3)))
self.assertRaises(
ValueError, aln.add_seqs, aln3, before_name="name5"
) # wrong after/before name
self.assertRaises(
ValueError, aln.add_seqs, aln3, after_name="name5"
) # wrong after/before name
if isinstance(aln, Alignment) or isinstance(aln, ArrayAlignment):
self.assertRaises((DataError, ValueError), aln.add_seqs, aln3 + aln3)
else:
exp = set([seq for name, seq in data])
exp.update([seq + seq for name, seq in data3])
got = set()
for seq in aln.add_seqs(aln3 + aln3).seqs:
got.update([str(seq).strip()])
self.assertEqual(got, exp)
# named sequences
self.rna1 = RnaSequence("UCAGGG", name="rna1")
self.rna2 = RnaSequence("YCU-RG", name="rna2")
self.rna3 = RnaSequence("CAA-NR", name="rna3")
self.model1 = ArraySequence(
"UCAGGG", name="rna1", alphabet=RNA.alphabets.degen_gapped
)
self.model2 = ArraySequence(
"YCU-RG", name="rna2", alphabet=RNA.alphabets.degen_gapped
)
self.model3 = ArraySequence(
"CAA-NR", name="rna3", alphabet=RNA.alphabets.degen_gapped
)
self.aln = Alignment([self.rna1, self.rna2, self.rna3], moltype=RNA)
self.da = ArrayAlignment(
[self.model1, self.model2, self.model3],
moltype=RNA,
alphabet=RNA.alphabets.degen_gapped,
)
# seqs no name
self.nn_rna1 = RnaSequence("UCAGGG")
self.nn_rna2 = RnaSequence("YCU-RG")
self.nn_rna3 = RnaSequence("CAA-NR")
self.nn_model1 = ArraySequence("UCAGGG", alphabet=RNA.alphabets.degen_gapped)
self.nn_model2 = ArraySequence("YCU-RG", alphabet=RNA.alphabets.degen_gapped)
self.nn_model3 = ArraySequence("CAA-NR", alphabet=RNA.alphabets.degen_gapped)
self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3], moltype=RNA)
self.nn_da = ArrayAlignment(
def test_regular_aln_to_model_aln(self):
"""Regular aln should convert to model aln"""
a = Alignment([self.r1, self.r2])
d = ArrayAlignment(a)
self.assertEqual(str(d), ">x\nAAA\n>y\nCCC\n")
d = ArrayAlignment(a, moltype=DNA)
self.assertEqual(str(d), ">x\nAAA\n>y\nCCC\n")
self.assertEqual(self.r1.name, "x")
[self.model1, self.model2, self.model3],
moltype=RNA,
alphabet=RNA.alphabets.degen_gapped,
)
# seqs no name
self.nn_rna1 = RnaSequence("UCAGGG")
self.nn_rna2 = RnaSequence("YCU-RG")
self.nn_rna3 = RnaSequence("CAA-NR")
self.nn_model1 = ArraySequence("UCAGGG", alphabet=RNA.alphabets.degen_gapped)
self.nn_model2 = ArraySequence("YCU-RG", alphabet=RNA.alphabets.degen_gapped)
self.nn_model3 = ArraySequence("CAA-NR", alphabet=RNA.alphabets.degen_gapped)
self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3], moltype=RNA)
self.nn_da = ArrayAlignment(
[self.nn_model1, self.nn_model2, self.nn_model3],
moltype=RNA,
alphabet=RNA.alphabets.degen_gapped,
)