Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2):
from cutadapt.adapters import BackAdapter
a1 = BackAdapter("GGTTAA")
a2 = BackAdapter("AACCGG")
s1 = Sequence("name", "CCCCGGTTAACCCC")
s2 = Sequence("name", "TTTTAACCGGTTTT")
pac = PairedAdapterCutter([a1], [a2], action=action)
info1 = ModificationInfo(s1)
info2 = ModificationInfo(s2)
trimmed1, trimmed2 = pac(s1, s2, info1, info2)
assert expected_trimmed1 == trimmed1.sequence
assert expected_trimmed2 == trimmed2.sequence
def test_back_adapter_partial_occurrence_in_front():
adapter = BackAdapter("CTGAATT", max_error_rate=0, min_overlap=4)
assert adapter.match_to("AATTGGGGGGG") is None
def test_linked_adapter():
front_adapter = PrefixAdapter('AAAA', min_overlap=4)
back_adapter = BackAdapter('TTTT', min_overlap=3)
linked_adapter = LinkedAdapter(
front_adapter, back_adapter, front_required=True, back_required=False, name='name')
assert linked_adapter.front_adapter.min_overlap == 4
assert linked_adapter.back_adapter.min_overlap == 3
read = Sequence(name='seq', sequence='AAAACCCCCTTTT')
trimmed = linked_adapter.match_to(read.sequence).trimmed(read)
assert trimmed.name == 'seq'
assert trimmed.sequence == 'CCCCC'
def test_paired_adapter_cutter_actions(action, expected_trimmed1, expected_trimmed2):
from cutadapt.adapters import BackAdapter
a1 = BackAdapter("GGTTAA")
a2 = BackAdapter("AACCGG")
s1 = Sequence("name", "CCCCGGTTAACCCC")
s2 = Sequence("name", "TTTTAACCGGTTTT")
pac = PairedAdapterCutter([a1], [a2], action=action)
info1 = ModificationInfo(s1)
info2 = ModificationInfo(s2)
trimmed1, trimmed2 = pac(s1, s2, info1, info2)
assert expected_trimmed1 == trimmed1.sequence
assert expected_trimmed2 == trimmed2.sequence
def test_statistics():
read = Sequence('name', 'AAAACCCCAAAA')
adapters = [BackAdapter("CCCC", max_error_rate=0.1)]
cutter = AdapterCutter(adapters, times=3)
cutter(read, ModificationInfo(read))
# TODO make this a lot simpler
trimmed_bp = 0
for adapter in adapters:
for d in (cutter.adapter_statistics[adapter].front.lengths,
cutter.adapter_statistics[adapter].back.lengths):
trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
assert trimmed_bp <= len(read), trimmed_bp
def test_issue_52():
adapter = BackAdapter(
sequence='GAACTCCAGTCACNNNNN',
max_error_rate=0.12,
min_overlap=5,
read_wildcards=False,
adapter_wildcards=True)
sequence = "CCCCAGAACTACAGTCCCGGC"
am = RemoveAfterMatch(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2,
adapter=adapter, sequence=sequence)
assert am.wildcards() == 'GGC'
"""
The result above should actually be 'CGGC' since the correct
def test_anywhere_parameter_back():
parser = AdapterParser(max_error_rate=0.2, min_overlap=4, read_wildcards=False,
adapter_wildcards=False, indels=True)
adapter = list(parser.parse('CTGAAGTGAAGTACACGGTT;anywhere', 'back'))[0]
assert isinstance(adapter, BackAdapter)
assert adapter._force_anywhere
# TODO move the rest to a separate test
read = Sequence('foo1', 'TGAAGTACACGGTTAAAAAAAAAA')
from cutadapt.modifiers import AdapterCutter
cutter = AdapterCutter([adapter])
trimmed_read = cutter(read, ModificationInfo(read))
assert trimmed_read.sequence == ''
def test_adapter_cutter():
from cutadapt.adapters import BackAdapter
a1 = BackAdapter("GTAGTCCCGC")
a2 = BackAdapter("GTAGTCCCCC")
match = AdapterCutter.best_match([a1, a2], Sequence("name", "ATACCCCTGTAGTCCCC"))
assert match.adapter is a2
def test_info_record():
adapter = BackAdapter(
sequence='GAACTCCAGTCACNNNNN',
max_error_rate=0.12,
min_overlap=5,
read_wildcards=False,
adapter_wildcards=True,
name="Foo")
read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
am = RemoveAfterMatch(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2,
adapter=adapter, sequence=read.sequence)
assert am.get_info_records(read) == [[
"",
2,
5,
21,
'CCCCA',
'GAACTACAGTCCCGGC',
def test_str():
a = BackAdapter('ACGT', max_error_rate=0.1)
str(a)
str(a.match_to("TTACGT"))