Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_maf():
expected_tcga_ov_variants = [
Variant(1, 1650797, "A", "G", ensembl),
Variant(1, 23836447, "C", "A", ensembl),
Variant(1, 231401797, "A", "C", ensembl),
Variant(11, 124617502, "C", "G", ensembl),
]
eq_(len(tcga_ov_variants), len(expected_tcga_ov_variants))
for v_expect, v_maf in zip(expected_tcga_ov_variants, tcga_ov_variants):
eq_(v_expect, v_maf)
gene_name = tcga_ov_variants.metadata[v_maf]['Hugo_Symbol']
assert any(gene.name == gene_name for gene in v_maf.genes), \
"Expected gene name %s but got %s" % (gene_name, v_maf.genes)
def test_noncoding_polymorphic_pseudogene():
# variant in MROH5-001, which is a polymorphic pseudogene
variant = Variant("8", 142458077, "C", "T", ensembl_grch37)
expect_effect(
variant,
transcript_id="ENST00000430863",
effect_class=NoncodingTranscript,
modifies_coding_sequence=False,
modifies_protein_sequence=False)
def validate_transcript_mutation(
ensembl_transcript_id,
chrom,
dna_position,
dna_ref,
dna_alt,
aa_pos,
aa_alt):
variant = Variant(chrom, dna_position, dna_ref, dna_alt, ensembl_grch37)
effects = variant.effects()
transcript_id_dict = {
effect.transcript.id: effect
for effect in effects
if isinstance(effect, TranscriptMutationEffect)
}
assert ensembl_transcript_id in transcript_id_dict, \
"%s not found in %s" % (ensembl_transcript_id, transcript_id_dict)
effect = transcript_id_dict[ensembl_transcript_id]
if isinstance(effect, ExonicSpliceSite):
# exonic splice site mutations carry with them an alternate effect
# which is what we check against dbNSFP (since that database seemed
# to ignore exonic splicing mutations)
effect = effect.alternate_effect
def test_release_75_contig_1():
df = ensembl_grch37.gtf.dataframe(contig="1")
assert df is not None
assert len(df) > 0
assert (df.seqname == "1").all()
df = ensembl_grch37.gtf.dataframe(contig=1)
assert df is not None
assert len(df) > 0
assert (df.seqname == "1").all()
def test_maf():
expected_tcga_ov_variants = [
Variant(1, 1650797, "A", "G", ensembl),
Variant(1, 23836447, "C", "A", ensembl),
Variant(1, 231401797, "A", "C", ensembl),
Variant(11, 124617502, "C", "G", ensembl),
]
eq_(len(tcga_ov_variants), len(expected_tcga_ov_variants))
for v_expect, v_maf in zip(expected_tcga_ov_variants, tcga_ov_variants):
eq_(v_expect, v_maf)
gene_name = tcga_ov_variants.metadata[v_maf]['Hugo_Symbol']
assert any(gene.name == gene_name for gene in v_maf.genes), \
"Expected gene name %s but got %s" % (gene_name, v_maf.genes)
def test_silent_stop_codons():
silent_stop_codon_variants = {
"ENST00000290524": Variant(
contig=1,
start=151314663,
ref="C",
alt="T",
ensembl=ensembl_grch37),
"ENST00000368725": Variant(
contig=1,
start=153409535,
ref="C",
alt="T",
ensembl=ensembl_grch37),
"ENST00000353479": Variant(
contig=10,
start=105791994,
ref="C",
alt="T",
ensembl=ensembl_grch37),
}
for transcript_id, variant in silent_stop_codon_variants.items():
yield (
expect_effect,
from __future__ import absolute_import
import functools
from pyensembl import (
ensembl_grch37,
ensembl_grch38,
cached_release
)
from nose.tools import nottest
major_releases = [
ensembl_grch37,
ensembl_grch38
]
contigs = list(range(1, 23)) + ["X", "Y", "M"]
@nottest
def test_ensembl_releases(*versions):
"""
Run a unit test which takes an EnsemblRelease as an argument
for multiple releases (most recent for each reference genome)
"""
if len(versions) == 0:
ensembl_releases = major_releases
else:
ensembl_releases = [cached_release(version) for version in versions]
def test_release_75_contig_MT():
df = ensembl_grch37.gtf.dataframe(contig="M")
assert df is not None
assert len(df) > 0
assert (df.seqname == "MT").all()
df = ensembl_grch37.gtf.dataframe(contig="MT")
assert df is not None
assert len(df) > 0
assert (df.seqname == "MT").all()
def _get_effect(chrom, pos, dna_ref, dna_alt, transcript_id):
variant = Variant(chrom, pos, dna_ref, dna_alt, ensembl=ensembl)
effects = variant.effects()
transcript_dict = effects.top_priority_effect_per_transcript_id()
assert transcript_id in transcript_dict, \
"Expected transcript ID %s for variant %s not found in %s" % (
transcript_id, variant, transcript_dict)
effect = transcript_dict[transcript_id]
# COSMIC seems to ignore exonic splice sites
if isinstance(effect, ExonicSpliceSite):
return effect.alternate_effect
else:
return effect