Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def variant_queryable():
vcf = MultiSampleVCF(vcf_file)
return VariantIntervalQueryable(vcf, [
(
[
Variant('chr1', 12, 'A', 'T'),
Variant('chr1', 18, 'A', 'C', filter='q10'),
],
Interval('chr1', 10, 20)
),
(
[
Variant('chr2', 120, 'AT', 'AAAT'),
],
Interval('chr2', 110, 200)
)
def test_variant():
v = Variant("chr1", 10, 'C', 'T')
assert v.start == 9
assert v.chrom == 'chr1'
assert v.pos == 10
assert v.ref == 'C'
assert v.alt == 'T'
assert isinstance(v.info, dict)
assert len(v.info) == 0
assert v.qual == 0
assert v.filter == 'PASS'
v.info['test'] = 10
assert v.info['test'] == 10
assert isinstance(str(v), str)
# make sure the original got unchangd
v2 = v.copy()
with pytest.raises(AttributeError):
v.chrom = 'asd'
with pytest.raises(AttributeError):
v.pos = 10
with pytest.raises(AttributeError):
v.ref = 'asd'
with pytest.raises(AttributeError):
v.alt = 'asd'
# non-fixed arguments
v.id = 'asd'
v.qual = 10
v.filter = 'asd'
v.source = 2
assert isinstance(Variant("chr1", '10', 'C', 'T').pos, int)
# from cyvcf2
vcf = cyvcf2.VCF('tests/data/test.vcf.gz')
cv = list(vcf)[0]
v2 = Variant.from_cyvcf(cv)
assert isinstance(v2.source, cyvcf2.Variant)
from conftest import vcf_file, gtf_file, example_intervals_bed
import pyranges
from kipoiseq.dataclasses import Interval, Variant
from kipoiseq.extractors.vcf import MultiSampleVCF
from kipoiseq.extractors.vcf_matching import variants_to_pyranges, \
pyranges_to_intervals, intervals_to_pyranges, BaseVariantMatcher, \
SingleVariantMatcher, MultiVariantsMatcher
intervals = [
Interval('chr1', 1, 10, strand='+'),
Interval('chr1', 23, 30, strand='-')
]
variants = [
Variant('chr1', 4, 'T', 'C'),
Variant('chr1', 5, 'A', 'GA'),
Variant('chr1', 25, 'AACG', 'GA')
]
pr = pyranges.PyRanges(
chromosomes='chr1',
starts=[1, 23, 5],
ends=[10, 30, 50],
strands=['+', '-', '.']
)
def test_variants_to_pyranges():
vcf = MultiSampleVCF(vcf_file)
variants = list(vcf)
df = variants_to_pyranges(variants).df
assert df.shape[0] == len(variants)
def test_MultiSampleVCF__regions_from_variants(multi_sample_vcf):
variants = [
Variant('chr1', 4, 'T', 'C'),
Variant('chr1', 25, 'AACG', 'GA'),
Variant('chr1', 55525, 'AACG', 'GA'),
Variant('chr10', 55525, 'AACG', 'GA')
]
regions = multi_sample_vcf._regions_from_variants(variants)
assert set(regions) == set([
Interval('chr1', 3, 25),
Interval('chr1', 55524, 55525),
Interval('chr10', 55524, 55525)
])
def test_MultiSampleVCF_get_variant(multi_sample_vcf):
variant = multi_sample_vcf.get_variant("chr1:4:T>C")
assert variant.chrom == 'chr1'
assert variant.pos == 4
assert variant.ref == 'T'
assert variant.alt == 'C'
variant = multi_sample_vcf.get_variant(Variant('chr1', 4, 'T', 'C'))
assert variant.chrom == 'chr1'
assert variant.pos == 4
assert variant.ref == 'T'
assert variant.alt == 'C'
with pytest.raises(KeyError):
multi_sample_vcf.get_variant("chr1:4:A>C")
import pyranges
from kipoiseq.dataclasses import Interval, Variant
from kipoiseq.extractors.vcf import MultiSampleVCF
from kipoiseq.extractors.vcf_matching import variants_to_pyranges, \
pyranges_to_intervals, intervals_to_pyranges, BaseVariantMatcher, \
SingleVariantMatcher, MultiVariantsMatcher
intervals = [
Interval('chr1', 1, 10, strand='+'),
Interval('chr1', 23, 30, strand='-')
]
variants = [
Variant('chr1', 4, 'T', 'C'),
Variant('chr1', 5, 'A', 'GA'),
Variant('chr1', 25, 'AACG', 'GA')
]
pr = pyranges.PyRanges(
chromosomes='chr1',
starts=[1, 23, 5],
ends=[10, 30, 50],
strands=['+', '-', '.']
)
def test_variants_to_pyranges():
vcf = MultiSampleVCF(vcf_file)
variants = list(vcf)
df = variants_to_pyranges(variants).df
assert df.shape[0] == len(variants)
if number of variants in given limits.
"""
def __init__(self, max_num=float('inf'), min_num=0):
# TODO: sample speficity
self.max_num = max_num
self.min_num = min_num
def __call__(self, variants, interval):
if self.max_num >= len(variants) >= self.min_num:
return [True] * len(variants)
else:
return [False] * len(variants)
_VariantIntervalType = List[Tuple[Iterable[Variant], Interval]]
class VariantIntervalQueryable:
def __init__(self, vcf, variant_intervals: _VariantIntervalType,
progress=False):
"""
Query object of variants.
Args:
vcf: cyvcf2.VCF objects.
variants: iter of (variant, interval) tuples.
"""
self.vcf = vcf
self.variant_intervals = variant_intervals
self.progress = progress