How to use the pyensembl.genome_for_reference_name function in pyensembl

To help you get started, we’ve selected a few pyensembl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openvax / pyensembl / test / test_transcript_ids.py View on Github external
"""
Tests for methods which return collections of transcript IDs that aren't
converting from some type of name or ID.
"""
from __future__ import absolute_import

from pyensembl import genome_for_reference_name
from nose.tools import eq_

from .common import test_ensembl_releases

grch38 = genome_for_reference_name("GRCh38")

# subset of transcript IDs for HLA-A
HLA_A_TRANSCRIPT_IDS = [
    'ENST00000396634',
    'ENST00000376809',
    'ENST00000376806',
    'ENST00000376802',
    'ENST00000496081',
    'ENST00000495183',
    'ENST00000461903',
    'ENST00000479320',
]

def test_transcript_ids_ensembl_grch38_hla_a():
    # chr6:29,945,884  is a position for HLA-A
    # based on:
github openvax / pyensembl / test / test_timings.py View on Github external
from __future__ import print_function, absolute_import

from pyensembl import genome_for_reference_name

from tinytimer import benchmark

ensembl = genome_for_reference_name("GRCh38")
contigs = [str(i + 1) for i in range(22)] + ["X", "Y"]

def make_repeat_lookup_fn(lookup_fn, n_positions):
    """
    Make a thunk which calls the lookup_fn at a number of loci
    for each human chromosome (excluding MT).
    """
    def repeat_lookup_fn():
        for contig in contigs:
            for position in [10 ** 6 + i * 10 ** 6 for i in range(n_positions)]:
                lookup_fn(contig, position)
    return repeat_lookup_fn

def run_benchmark(lookup_fn, n_positions_per_contig=20, time_limit=60.0):
    """
    Take a lookup functions (such as EnsemblRelease.genes_at_locus) and
github openvax / isovar / test / test_reference_sequence_key.py View on Github external
def test_reference_sequence_key_from_weird_deletion():
    # variant reads into the intron; want to make sure isovar skips over such cases
    mouse_genome = genome_for_reference_name("grcm38")
    variant = Variant(
        "11", 106262686, "GTGAAGG", "", mouse_genome)
    transcript = mouse_genome.transcript_by_id("ENSMUST00000021049")
    sequence_key = ReferenceSequenceKey.from_variant_and_transcript(
        variant=variant,
        transcript=transcript,
        context_size=10)
    assert sequence_key is None, '%s\n%s' % (sequence_key, transcript)
github openvax / varcode / test / test_effect_classes.py View on Github external
PrematureStop,
    FrameShift,
    ExonLoss,
    ExonicSpliceSite,
    FrameShiftTruncation,
    # TODO: SpliceDonor, SpliceReceptor
)
from pyensembl import ensembl_grch37, cached_release, genome_for_reference_name

from .common import expect_effect

# tried using more recent releases but found that many of them
# are very specific to Ensembl data between releases 77-81
ensembl_grch38 = cached_release(81)

mouse_genome = genome_for_reference_name("grcm38")

def test_incomplete():
    # transcript EGFR-009 (ENST00000450046 in Ensembl 78)
    # has an incomplete 3' end
    # chrom. 7 starting at 55,109,723
    # first exon begins: ATCATTCCTTTGGGCCTAGGA

    # change the first nucleotide of the 5' UTR A>T
    variant = Variant("7", 55109723, "A", "T", ensembl=ensembl_grch38)
    expect_effect(
        variant,
        transcript_id="ENST00000450046",
        effect_class=IncompleteTranscript,
        modifies_coding_sequence=False,
        modifies_protein_sequence=False)
github openvax / pyensembl / test / common.py View on Github external
from __future__ import absolute_import

import functools

from pyensembl import (
    genome_for_reference_name,
    cached_release,
    MAX_ENSEMBL_RELEASE,
)
from nose.tools import nottest

grch37 = genome_for_reference_name("GRCh37")
grch38 = genome_for_reference_name("GRCh38")

major_releases = [grch37, grch38]

contigs = [str(c) for c in range(1, 23)] + ["X", "Y", "M"]

@nottest
def test_ensembl_releases(*versions):
    """
    Run a unit test which takes an EnsemblRelease as an argument
    for multiple releases (most recent for each reference genome)
    """

    if len(versions) == 0:
        ensembl_releases = major_releases
    else:
github openvax / pyensembl / test / common.py View on Github external
from __future__ import absolute_import

import functools

from pyensembl import (
    genome_for_reference_name,
    cached_release,
    MAX_ENSEMBL_RELEASE,
)
from nose.tools import nottest

grch37 = genome_for_reference_name("GRCh37")
grch38 = genome_for_reference_name("GRCh38")

major_releases = [grch37, grch38]

contigs = [str(c) for c in range(1, 23)] + ["X", "Y", "M"]

@nottest
def test_ensembl_releases(*versions):
    """
    Run a unit test which takes an EnsemblRelease as an argument
    for multiple releases (most recent for each reference genome)
    """

    if len(versions) == 0:
        ensembl_releases = major_releases
    else:
        if any(version > MAX_ENSEMBL_RELEASE for version in versions):
github openvax / pyensembl / test / test_transcript_sequences.py View on Github external
"""Make sure we're getting correct transcritp sequence from Ensembl and that
it's a sequence type which correctly implements `complement`
and `reverse_complement`
"""

from __future__ import absolute_import
from nose.tools import eq_
from pyensembl import genome_for_reference_name

grch38 = genome_for_reference_name("GRCh38")

def test_transcript_sequence_ensembl_grch38():
    # extremely short TRD gene
    seq = grch38.transcript_sequence("ENST00000448914")
    expected = "ACTGGGGGATACG"
    eq_(seq, expected)
    # now try via a Transcript object
    eq_(grch38.transcript_by_id("ENST00000448914").sequence, expected)
github openvax / pyensembl / test / test_gene_names.py View on Github external
"""
Test all methods which return collections of gene names that aren't converting
from some other type of name or ID.
"""
from __future__ import absolute_import, print_function
from pyensembl import genome_for_reference_name

from .common import test_ensembl_releases

grch38 = genome_for_reference_name("GRCh38")

KNOWN_GENE_NAMES = [
    "TP53",
    "ERBB2",
    "SMAD4",
    "CTAG1A",
    "HLA-A",
]

@test_ensembl_releases()
def test_all_gene_names(ensembl):
    """
    test_all_gene_names : Make sure some known gene names such as
    SMAD4, TP53, ERBB2, &c
    """
    gene_names = ensembl.gene_names()
github openvax / varcode / varcode / reference.py View on Github external
Ensembl version.

    If given a string, return latest EnsemblRelease which has a reference
    of the same name.

    If given a PyEnsembl Genome, simply return it.
    """
    if isinstance(genome_object_string_or_int, Genome):
        return genome_object_string_or_int
    if is_integer(genome_object_string_or_int):
        return cached_release(genome_object_string_or_int)
    elif is_string(genome_object_string_or_int):
        # first infer the canonical reference name, e.g. mapping hg19 -> GRCh37
        # and then get the associated PyEnsembl Genome object
        reference_name = infer_reference_name(genome_object_string_or_int)
        return genome_for_reference_name(reference_name)
    else:
        raise TypeError(
            ("Expected genome to be an int, string, or pyensembl.Genome "
                "instance, got %s : %s") % (
                str(genome_object_string_or_int),
                type(genome_object_string_or_int)))
github openvax / varcode / varcode / cli / variant_args.py View on Github external
def variant_collection_from_args(args, required=True):
    variant_collections = []

    if args.genome:
        genome = genome_for_reference_name(args.genome)
    else:
        # no genome specified, assume it can be inferred from the file(s)
        # we're loading
        genome = None

    for vcf_path in args.vcf:
        variant_collections.append(load_vcf(vcf_path, genome=genome))
    for maf_path in args.maf:
        variant_collections.append(load_maf(maf_path))

    if args.variant:
        if not genome:
            raise ValueError(
                "--reference-name must be specified when using --variant")

        variants = [