Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# 012345
# --GGTTTA
m1, seq1 = DNA.make_seq("ACGGT--A").parse_out_gaps()
m2, seq2 = DNA.make_seq("--GGTTTA").parse_out_gaps()
x, y = get_align_coords(m1, m2)
expect = [2, 4, None, 5, 5], [0, 2, None, 5, 5]
self.assertEqual((x, y), expect)
# we have no gaps, so coords will be None
m1, s1 = seq1.parse_out_gaps()
m2, s2 = seq2.parse_out_gaps()
self.assertEqual(get_align_coords(m1, m2), None)
# unless we indicate the seqs came from an Alignment
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTTTA").parse_out_gaps()
x, y = get_align_coords(m1, m2, aligned=True)
self.assertEqual((x, y), ([0, len(seq1)], [0, len(seq1)]))
# raises an exception if the Aligned seqs are different lengths
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTT").parse_out_gaps()
with self.assertRaises(AssertionError):
get_align_coords(m1, m2, aligned=True)
def test_seq_different_name_with_same_length(self):
"""copying features between sequences"""
# You can copy to a sequence with a different name,
# in a different alignment if the feature lies within the length
aln = make_aligned_seqs(
data=[["x", "-AAAAAAAAA"], ["y", "TTTT--TTTT"]], array_align=False
)
seq = DNA.make_seq("CCCCCCCCCCCCCCCCCCCC", "x")
match_exon = seq.add_feature("exon", "A", [(5, 8)])
aln.get_seq("y").copy_annotations(seq)
copied = list(aln.get_annotations_from_seq("y", "exon"))
self.assertEqual(str(copied), '[exon "A" at [7:10]/10]')
def test_stop_indexes(self):
"""should return stop codon indexes for a specified frame"""
sgc = GeneticCode(self.SGC)
seq = DNA.make_seq("ATGCTAACATAAA")
expected = [[9], [4], []]
for frame, expect in enumerate(expected):
got = sgc.get_stop_indices(seq, start=frame)
self.assertEqual(got, expect)
def test_best_frame(self):
"""correctly identify best frame with/without allowing rc"""
make_seq = DNA.make_seq
seq = make_seq("ATGCTAACATAAA", name="fake1")
f = best_frame(seq)
self.assertEqual(f, 1)
f = best_frame(seq, require_stop=True)
self.assertEqual(f, 1)
# a challenging seq, translatable in 1 and 3 frames, ending on stop in
# frame 1. Should return frame 1 irrespective of require_stop
seq = make_seq("ATGTTACGGACGATGCTGAAGTCGAAGATCCACCGCGCCACGGTGACCTGCTGA")
f = best_frame(seq)
self.assertEqual(f, 1)
# a rc seq
f = best_frame(seq)
seq = make_seq(
"AATATAAATGCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTTCATAAAGTCATA", name="fake2"
def test_roundtrip_variable(self):
"""should recover the Variable feature type"""
seq = DNA.make_seq("AAGGGGAAAACCCCCAAAAAAAAAATTTTTTTTTTAAA", name="plus")
xx_y = [[[2, 6], 2.4], [[10, 15], 5.1], [[25, 35], 1.3]]
y_valued = seq.add_annotation(Variable, "SNP", "freq", xx_y)
json = seq.to_json()
new = deserialise_object(json)
got = list(new.get_annotations_matching("SNP"))[0]
# annoyingly, comes back as list of lists
self.assertEqual(got.xxy_list, [[list(xx), y] for xx, y in y_valued.xxy_list])
def makeSampleSequence():
seq = 'tgccnwsrygagcgtgttaaacaatggccaactctctaccttcctatgttaaacaagtgagatcgcaggcgcgccaaggc'
seq = DNA.make_seq(seq)
v = seq.add_annotation(annotation.Feature, 'exon', 'exon', [(20, 35)])
v = seq.add_annotation(annotation.Feature, 'repeat_unit',
'repeat_unit', [(39, 49)])
v = seq.add_annotation(annotation.Feature,
'repeat_unit', 'rep2', [(49, 60)])
return seq
expect = [2, 4, None, 5, 5], [0, 2, None, 5, 5]
self.assertEqual((x, y), expect)
# we have no gaps, so coords will be None
m1, s1 = seq1.parse_out_gaps()
m2, s2 = seq2.parse_out_gaps()
self.assertEqual(get_align_coords(m1, m2), None)
# unless we indicate the seqs came from an Alignment
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTTTA").parse_out_gaps()
x, y = get_align_coords(m1, m2, aligned=True)
self.assertEqual((x, y), ([0, len(seq1)], [0, len(seq1)]))
# raises an exception if the Aligned seqs are different lengths
m1, seq1 = DNA.make_seq("ACGGTTTA").parse_out_gaps()
m2, seq2 = DNA.make_seq("GGGGTT").parse_out_gaps()
with self.assertRaises(AssertionError):
get_align_coords(m1, m2, aligned=True)
ref_aln_seq = aligned_from_cigar(
cigars[ref_seqname], seqs[ref_seqname], moltype=moltype
)
m, aln_loc = slice_cigar(cigars[ref_seqname], start, end, by_align=False)
data[ref_seqname] = ref_aln_seq[aln_loc[0] : aln_loc[1]]
for seqname in [
seqname for seqname in list(seqs.keys()) if seqname != ref_seqname
]:
m, seq_loc = slice_cigar(cigars[seqname], aln_loc[0], aln_loc[1])
if seq_loc:
seq = seqs[seqname]
if isinstance(seq, str):
seq = moltype.make_seq(seq)
data[seqname] = seq[seq_loc[0] : seq_loc[1]].gapped_by_map(m)
else:
data[seqname] = DNA.make_seq("-" * (aln_loc[1] - aln_loc[0]))
aln = make_aligned_seqs(data)
return aln