Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_anywhere_with_errors():
adapter = AnywhereAdapter("CCGCATTTAG", max_error_rate=0.1)
for seq, expected_trimmed in (
('AACCGGTTccgcatttagGATC', 'AACCGGTT'),
('AACCGGTTccgcgtttagGATC', 'AACCGGTT'), # one mismatch
('AACCGGTTccgcatttag', 'AACCGGTT'),
('ccgcatttagAACCGGTT', 'AACCGGTT'),
('ccgtatttagAACCGGTT', 'AACCGGTT'), # one mismatch
('ccgatttagAACCGGTT', 'AACCGGTT'), # one deletion
):
read = Sequence('foo', seq)
cutter = AdapterCutter([adapter], times=1)
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == expected_trimmed
def test_shortener():
read = Sequence('read1', 'ACGTTTACGTA', '##456789###')
shortener = Shortener(0)
assert shortener(read, ModificationInfo(read)) == Sequence('read1', '', '')
shortener = Shortener(1)
assert shortener(read, ModificationInfo(read)) == Sequence('read1', 'A', '#')
shortener = Shortener(5)
assert shortener(read, ModificationInfo(read)) == Sequence('read1', 'ACGTT', '##456')
shortener = Shortener(100)
assert shortener(read, ModificationInfo(read)) == read
def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2):
from cutadapt.adapters import BackAdapter
a1 = BackAdapter("GGTTAA")
a2 = BackAdapter("AACCGG")
s1 = Sequence("name", "CCCCGGTTAACCCC")
s2 = Sequence("name", "TTTTAACCGGTTTT")
pac = PairedAdapterCutter([a1], [a2], action=action)
info1 = ModificationInfo(s1)
info2 = ModificationInfo(s2)
trimmed1, trimmed2 = pac(s1, s2, info1, info2)
assert expected_trimmed1 == trimmed1.sequence
assert expected_trimmed2 == trimmed2.sequence
def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2):
from cutadapt.adapters import BackAdapter
a1 = BackAdapter("GGTTAA")
a2 = BackAdapter("AACCGG")
s1 = Sequence("name", "CCCCGGTTAACCCC")
s2 = Sequence("name", "TTTTAACCGGTTTT")
pac = PairedAdapterCutter([a1], [a2], action=action)
info1 = ModificationInfo(s1)
info2 = ModificationInfo(s2)
trimmed1, trimmed2 = pac(s1, s2, info1, info2)
assert expected_trimmed1 == trimmed1.sequence
assert expected_trimmed2 == trimmed2.sequence
"""
adapter = BackAdapter("TCGATCGATCGAT", max_error_rate=0.1)
read = Sequence('foo1', 'AAAAAAAAAAATCGTCGATC')
cutter = AdapterCutter([adapter], times=1)
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == 'AAAAAAAAAAA'
assert cutter.adapter_statistics[adapter].back.lengths == {9: 1}
# We see 1 error at length 9 even though the number of allowed mismatches at
# length 9 is 0.
assert cutter.adapter_statistics[adapter].back.errors[9][1] == 1
read = Sequence('foo2', 'AAAAAAAAAAATCGAACGA')
cutter = AdapterCutter([adapter], times=1)
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == read.sequence
assert cutter.adapter_statistics[adapter].back.lengths == {}
def test_anywhere_parameter_front():
parser = AdapterParser(max_error_rate=0.2, min_overlap=4, read_wildcards=False,
adapter_wildcards=False, indels=True)
adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'front'))[0]
assert isinstance(adapter, FrontAdapter)
assert adapter._force_anywhere
# TODO move the rest to a separate test
read = Sequence('foo1', 'AAAAAAAAAACTGAAGTGAA')
from cutadapt.modifiers import AdapterCutter
cutter = AdapterCutter([adapter])
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == ''
def test_quality_trimmer():
read = Sequence('read1', 'ACGTTTACGTA', '##456789###')
qt = QualityTrimmer(10, 10, 33)
assert qt(read, ModificationInfo(read)) == Sequence('read1', 'GTTTAC', '456789')
qt = QualityTrimmer(0, 10, 33)
assert qt(read, ModificationInfo(read)) == Sequence('read1', 'ACGTTTAC', '##456789')
qt = QualityTrimmer(10, 0, 33)
assert qt(read, ModificationInfo(read)) == Sequence('read1', 'GTTTACGTA', '456789###')
def test_statistics():
read = Sequence('name', 'AAAACCCCAAAA')
adapters = [BackAdapter("CCCC", max_error_rate=0.1)]
cutter = AdapterCutter(adapters, times=3)
cutter(read, ModificationInfo(read))
# TODO make this a lot simpler
trimmed_bp = 0
for adapter in adapters:
for d in (cutter.adapter_statistics[adapter].front.lengths,
cutter.adapter_statistics[adapter].back.lengths):
trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
assert trimmed_bp <= len(read), trimmed_bp
def test_nend_trimmer():
trimmer = NEndTrimmer()
seqs = ['NNNNAAACCTTGGNNN', 'NNNNAAACNNNCTTGGNNN', 'NNNNNN']
trims = ['AAACCTTGG', 'AAACNNNCTTGG', '']
for seq, trimmed in zip(seqs, trims):
_seq = Sequence('read1', seq, qualities='#'*len(seq))
_trimmed = Sequence('read1', trimmed, qualities='#'*len(trimmed))
assert trimmer(_seq, ModificationInfo(_seq)) == _trimmed
def test_anywhere_parameter_back():
parser = AdapterParser(max_error_rate=0.2, min_overlap=4, read_wildcards=False,
adapter_wildcards=False, indels=True)
adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0]
assert isinstance(adapter, BackAdapter)
assert adapter._force_anywhere
# TODO move the rest to a separate test
read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA')
from cutadapt.modifiers import AdapterCutter
cutter = AdapterCutter([adapter])
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == ''