How to use the kipoiseq.extractors.vcf.MultiSampleVCF function in kipoiseq

To help you get started, we’ve selected a few kipoiseq examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / kipoiseq / tests / extractors / test_vcf_matching.py View on Github external
def test_variants_to_pyranges():
    vcf = MultiSampleVCF(vcf_file)
    variants = list(vcf)
    df = variants_to_pyranges(variants).df
    assert df.shape[0] == len(variants)

    v = df.iloc[0]
    assert v.Chromosome == 'chr1'
    assert v.Start == 3
    assert v.End == 4
    assert v.variant.ref == 'T'
    assert v.variant.alt == 'C'
github kipoi / kipoiseq / tests / test_5_protein_dl.py View on Github external
def __init__(self, fasta_file, vcf_file, gtf_file):
        self.gtf_file = str(gtf_file)
        self.fasta_file = str(fasta_file)
        self.vcf_file = str(vcf_file)
        self.genome_cds_fetcher = GenomeCDSFetcher(self.gtf_file, self.fasta_file)
        self.multi_sample_VCF = MultiSampleVCF(self.vcf_file)
        self.variant_seq_extractor = VariantSeqExtractor(self.fasta_file)
github kipoi / kipoiseq / tests / extractors / test_vcf.py View on Github external
def multi_sample_vcf():
    return MultiSampleVCF(vcf_file)
github kipoi / kipoiseq / tests / extractors / test_vcf.py View on Github external
def test_MultiSampleVCF_VariantQueryable_to_vcf(tmpdir, multi_sample_vcf):
    output_vcf_file = str(tmpdir / 'output.vcf')

    multi_sample_vcf \
        .query_variants(intervals) \
        .filter_range(NumberVariantQuery(max_num=1)) \
        .to_vcf(output_vcf_file)

    vcf = MultiSampleVCF(output_vcf_file)
    variants = list(vcf)
    assert len(variants) == 1
    assert variants[0].ref == 'AACG'
    assert variants[0].alt == 'GA'
github kipoi / kipoiseq / kipoiseq / extractors / protein.py View on Github external
def __init__(self, gtf_file, fasta_file, vcf_file):
        self.gtf_file = str(gtf_file)
        self.fasta_file = str(fasta_file)
        self.vcf_file = str(vcf_file)
        self.cds_fetcher = CDSFetcher(self.gtf_file)
        # dataframe to pyranges
        pr_cds = pyranges.PyRanges(self.cds_fetcher.cds.reset_index())
        # match variant with transcript_id
        self.single_variant_matcher = SingleVariantMatcher(
            self.vcf_file, pranges=pr_cds)

        self.fasta = FastaStringExtractor(self.fasta_file)
        self.multi_sample_VCF = MultiSampleVCF(self.vcf_file)
        self.variant_seq_extractor = VariantSeqExtractor(self.fasta_file)
github kipoi / kipoiseq / kipoiseq / extractors / vcf.py View on Github external
def __init__(self, *args, **kwargs):
        from cyvcf2 import VCF
        super(MultiSampleVCF, self).__init__(*args, **kwargs, strict_gt=True)
        self.sample_mapping = dict(zip(self.samples, range(len(self.samples))))