How to use the kipoi.data.SampleIterator function in kipoi

To help you get started, we’ve selected a few kipoi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / models / MMSplice / modules / exon_5prime / dataloader.py View on Github external
"""Dataloader
"""

import pickle
import warnings
import gffutils
from pyfaidx import Fasta
from concise.preprocessing import encodeDNA
from kipoi.data import SampleIterator
from kipoi.metadata import GenomicRanges
from mmsplice.vcf_dataloader import ExonInterval


class ExonDataLoader5(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,
github kipoi / models / MMSplice / modules / acceptor / dataloader.py View on Github external
"""Dataloader
"""

import pickle
import warnings
import gffutils
from pyfaidx import Fasta
from concise.preprocessing import encodeDNA
from kipoi.data import SampleIterator
from kipoi.metadata import GenomicRanges
from mmsplice.vcf_dataloader import ExonInterval


class IntronDataLoader(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,
github kipoi / kipoiseq / kipoiseq / dataloaders / protein.py View on Github external
from kipoiseq.extractors import SingleVariantProteinVCFSeqExtractor, \
    TranscriptSeqExtractor
from kipoi.data import SampleIterator


__all__ = [
    'SingleVariantProteinDataLoader'
]


class SingleVariantProteinDataLoader(SampleIterator):

    def __init__(self, gtf_file, fasta_file, vcf_file):
        self.protein_vcf_extractor = SingleVariantProteinVCFSeqExtractor(
            gtf_file, fasta_file, vcf_file)
        self.transcript_extractor = TranscriptSeqExtractor(
            gtf_file, fasta_file)
        cds = self.transcript_extractor.cds_fetcher.cds
        # only needed metadata
        self.metadatas = ((cds.loc[~cds.index.duplicated(keep='first')]).drop(
            columns=['Start', 'End']))
        # generator for all sequences with variants
        self.sequences = self._extractor()

    def __iter__(self):
        return self
github kipoi / models / MMSplice / modules / donor / dataloader.py View on Github external
"""
import numpy as np
from kipoi.data import SampleIterator

import pickle
from pyfaidx import Fasta
import gffutils
from concise.preprocessing import encodeDNA
import warnings
from kipoi.metadata import GenomicRanges

from mmsplice import MMSplice
from mmsplice.vcf_dataloader import ExonInterval


class IntronDataLoader(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,
github kipoi / kipoiseq / kipoiseq / dataloaders / splicing.py View on Github external
for exon in gtf_db.children(gene, featuretype='exon'):
            isLast = False  # track whether is last exon
            if firstLastNoExtend:
                if (gene.strand == "+" and exon.end == gene.end) or (gene.strand == "-" and exon.start == gene.start):
                    overhang = (overhang[0], 0)
                    isLast = True
                elif (gene.strand == "+" and exon.start == gene.start) or (gene.strand == "-" and exon.end == gene.end):
                    overhang = (0, overhang[1])
            exon = ExonInterval.from_feature(exon, overhang)
            exon.isLast = isLast
            overhang = default_overhang
            yield exon


@kipoi_dataloader(override={"dependencies": deps, 'info.authors': package_authors})
class MMSpliceDl(SampleIterator):
    """
    info:
        doc: >
            Dataloader for splicing models. With inputs as gtf annotation file and fasta file,
            each output is an exon sequence with flanking intronic seuqences. Intronic sequnce
            lengths specified by the users. Returned sequences are of the type np.array([str])
    args:
        gtf_file:
            doc: file path; Genome annotation GTF file
            example:
                url: https://raw.githubusercontent.com/kipoi/models/master/MMSplice/tests/data/test.gtf
                md5: b20607afe91ec20d6ee79ed95ab0e85b
        fasta_file:
            doc: Reference Genome sequence in fasta format
            example:
                url: https://raw.githubusercontent.com/kipoi/models/master/MMSplice/tests/data/hg19.nochr.chr17.fa
github kipoi / models / MMSplice / modules / exon_3prime / dataloader.py View on Github external
"""Dataloader
"""

import pickle
import warnings
import gffutils
from pyfaidx import Fasta
from concise.preprocessing import encodeDNA
from kipoi.metadata import GenomicRanges
from kipoi.data import SampleIterator
from mmsplice.vcf_dataloader import ExonInterval


class ExonDataLoader(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,
github kipoi / models / MMSplice / modules / intron_3prime / dataloader.py View on Github external
"""Dataloader
"""

import pickle
import warnings
import gffutils
from pyfaidx import Fasta
from concise.preprocessing import encodeDNA
from kipoi.metadata import GenomicRanges
from kipoi.data import SampleIterator
from mmsplice.vcf_dataloader import ExonInterval


class IntronDataLoader(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,
github kipoi / models / MMSplice / modules / intron_5prime / dataloader.py View on Github external
"""Dataloader
"""

import pickle
import warnings
import gffutils
from pyfaidx import Fasta
from concise.preprocessing import encodeDNA
from kipoi.data import SampleIterator
from kipoi.metadata import GenomicRanges
from mmsplice.vcf_dataloader import ExonInterval


class IntronDataLoader(SampleIterator):
    """
    Load genome annotation (gtf) file along with a vcf file, return wt sequence and mut sequence.
    Args:
    gtf: gtf file or pickled gtf IntervalTree.
    fasta_file: file path; Genome sequence
    vcf_file: file path; vcf file with variants to score
    """

    def __init__(self,
                 gtf_file,
                 fasta_file,
                 split_seq=True,
                 encode=True,
                 exon_cut_l=0,
                 exon_cut_r=0,
                 acceptor_intron_cut=6,