Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_variants_to_pyranges():
vcf = MultiSampleVCF(vcf_file)
variants = list(vcf)
df = variants_to_pyranges(variants).df
assert df.shape[0] == len(variants)
v = df.iloc[0]
assert v.Chromosome == 'chr1'
assert v.Start == 3
assert v.End == 4
assert v.variant.ref == 'T'
assert v.variant.alt == 'C'
def __init__(self, fasta_file, vcf_file, gtf_file):
self.gtf_file = str(gtf_file)
self.fasta_file = str(fasta_file)
self.vcf_file = str(vcf_file)
self.genome_cds_fetcher = GenomeCDSFetcher(self.gtf_file, self.fasta_file)
self.multi_sample_VCF = MultiSampleVCF(self.vcf_file)
self.variant_seq_extractor = VariantSeqExtractor(self.fasta_file)
def multi_sample_vcf():
return MultiSampleVCF(vcf_file)
def test_MultiSampleVCF_VariantQueryable_to_vcf(tmpdir, multi_sample_vcf):
output_vcf_file = str(tmpdir / 'output.vcf')
multi_sample_vcf \
.query_variants(intervals) \
.filter_range(NumberVariantQuery(max_num=1)) \
.to_vcf(output_vcf_file)
vcf = MultiSampleVCF(output_vcf_file)
variants = list(vcf)
assert len(variants) == 1
assert variants[0].ref == 'AACG'
assert variants[0].alt == 'GA'
def __init__(self, gtf_file, fasta_file, vcf_file):
self.gtf_file = str(gtf_file)
self.fasta_file = str(fasta_file)
self.vcf_file = str(vcf_file)
self.cds_fetcher = CDSFetcher(self.gtf_file)
# dataframe to pyranges
pr_cds = pyranges.PyRanges(self.cds_fetcher.cds.reset_index())
# match variant with transcript_id
self.single_variant_matcher = SingleVariantMatcher(
self.vcf_file, pranges=pr_cds)
self.fasta = FastaStringExtractor(self.fasta_file)
self.multi_sample_VCF = MultiSampleVCF(self.vcf_file)
self.variant_seq_extractor = VariantSeqExtractor(self.fasta_file)
def __init__(self, *args, **kwargs):
from cyvcf2 import VCF
super(MultiSampleVCF, self).__init__(*args, **kwargs, strict_gt=True)
self.sample_mapping = dict(zip(self.samples, range(len(self.samples))))