Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_interval_seq_builder_concat(interval_seq_builder):
with pytest.raises(TypeError):
interval_seq_builder.concat()
sequence = Sequence(seq='CCCCATCGNN', start=10, end=20)
interval_seq_builder.restore(sequence)
assert interval_seq_builder.concat() == 'CCCCTAGCNN'
def test__split_overlapping(variant_seq_extractor):
pair = (Sequence(seq='AAA', start=3, end=6),
Sequence(seq='T', start=3, end=4))
splited_pairs = list(variant_seq_extractor._split_overlapping([pair], 5))
assert splited_pairs[0][0].seq == 'AA'
assert splited_pairs[0][1].seq == 'T'
assert splited_pairs[1][0].seq == 'A'
assert splited_pairs[1][1].seq == ''
pair = (Sequence(seq='TT', start=3, end=5),
Sequence(seq='AAA', start=3, end=6))
splited_pairs = list(variant_seq_extractor._split_overlapping([pair], 4))
assert splited_pairs[0][0].seq == 'T'
assert splited_pairs[0][1].seq == 'A'
assert splited_pairs[1][0].seq == 'T'
assert splited_pairs[1][1].seq == 'AA'
# extend
if extend_up:
if rc:
ends[-1] += extend_up
else:
starts[0] -= extend_up
if extend_down:
if rc:
starts[0] -= extend_down
else:
ends[-1] += extend_down
intervals = zip(starts, ends)
seq = self.get_spliced_seq(chrom, intervals, rc)
yield Sequence(name, seq.seq)
# load more lines if needed
lines += fin.readlines(1)
def __set_fasta_seq(self, fasta_seq):
if isinstance(fasta_seq, Seq.Seq):
self.__fasta_seq = pyfaidx.Sequence(name=self.id, seq=str(fasta_seq))
elif isinstance(fasta_seq, str):
self.__fasta_seq = pyfaidx.Sequence(name=self.id, seq=str(fasta_seq))
assert len(self.__fasta_seq) == len(str(fasta_seq))
elif isinstance(fasta_seq, pyfaidx.Sequence):
self.__fasta_seq = fasta_seq
else:
raise ValueError("Unkown type: {}".format(type(fasta_seq)))
def _regions_to_seqs(self, track, extend_up=0, extend_down=0):
if isinstance(track, list):
for region in track:
name = region.strip()
seq = self._region_to_seq(name, extend_up, extend_down)
yield Sequence(name, seq)
else:
with open(track) as fin:
bufsize = 10000
lines = fin.readlines(bufsize)
for region in lines:
name = region.strip()
seq = self._region_to_seq(name, extend_up, extend_down)
yield Sequence(name, seq)
# load more lines if needed
lines += fin.readlines()
def _variant_to_sequence(variants):
"""
Convert `cyvcf2.Variant` objects to `pyfaidx.Seqeunce` objects
for reference and variants.
"""
for v in variants:
ref = Sequence(name=v.chrom, seq=v.ref,
start=v.start, end=v.start + len(v.ref))
alt = Sequence(name=v.chrom, seq=v.alt,
start=v.start, end=v.start + len(v.alt))
yield ref, alt
def _fetch(self, interval, istart, iend):
seq = self._ref_seq_extractor.extract(
Interval(interval.chrom, istart, iend))
seq = Sequence(name=interval.chrom, seq=seq, start=istart, end=iend)
return seq