Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pandas_and_pyvcf_implementations_equivalent():
paths = [
{'path': data_path("somatic_hg19_14muts.vcf")},
{'path': data_path("somatic_hg19_14muts.space_in_sample_name.vcf")},
{'path': "/" + data_path("somatic_hg19_14muts.vcf")},
{'path': data_path("somatic_hg19_14muts.vcf.gz")},
{'path': data_path("multiallelic.vcf")},
{'path': data_path("mutect-example.vcf")},
{'path': data_path("strelka-example.vcf")},
{'path': data_path("mutect-example-headerless.vcf"),
'genome': cached_release(75)},
]
if RUN_TESTS_REQUIRING_INTERNET:
paths.append({'path': VCF_EXTERNAL_URL})
paths.append({'path': VCF_EXTERNAL_URL + ".gz"})
def do_test(kwargs):
vcf_pandas = load_vcf_fast(**kwargs)
vcf_pyvcf = load_vcf(**kwargs)
eq_(vcf_pandas, vcf_pyvcf)
eq_(len(vcf_pandas), len(vcf_pyvcf))
eq_(vcf_pandas.elements, vcf_pyvcf.elements)
eq_(vcf_pandas.metadata, vcf_pyvcf.metadata)
assert len(vcf_pandas) > 1
assert len(vcf_pyvcf) > 1
for kwargs in paths:
def test_version_too_old_47():
EnsemblRelease(47)
from pyensembl import EnsemblRelease
ensembl75 = EnsemblRelease(75)
def test_gene_ids():
# only load chromosome 1 to speed up tests
df = ensembl75.dataframe(contig="1")
assert 'gene_id' in df
# Ensembl gene ids are formatted like ENSG00000223972
# which is always length 15
assert (df['gene_id'].str.len() == 15).all(), \
df[df['gene_id'].str.len() != 15]
def test_version_is_not_numeric():
EnsemblRelease("wuzzle")
def test_version_is_none():
EnsemblRelease(None)
assert normalize_chromosome("chrmt") == "chrMT"
with assert_raises(TypeError):
normalize_chromosome({"a": "b"})
with assert_raises(TypeError):
normalize_chromosome([])
with assert_raises(TypeError):
normalize_chromosome(None)
with assert_raises(ValueError):
normalize_chromosome("")
with assert_raises(ValueError):
normalize_chromosome(0)
def test_normalize_chromosome():
assert normalize_chromosome("X") == "X"
assert normalize_chromosome("chrX") == "chrX"
assert normalize_chromosome("x") == "X"
assert normalize_chromosome("chrx") == "chrX"
assert normalize_chromosome(1) == "1"
assert normalize_chromosome("1") == "1"
assert normalize_chromosome("chr1") == "chr1"
assert normalize_chromosome("chrM") == "chrM"
assert normalize_chromosome("chrMT") == "chrMT"
assert normalize_chromosome("M") == "M"
assert normalize_chromosome("MT") == "MT"
assert normalize_chromosome("m") == "M"
assert normalize_chromosome("chrm") == "chrM"
assert normalize_chromosome("mt") == "MT"
assert normalize_chromosome("chrmt") == "chrMT"
with assert_raises(TypeError):
normalize_chromosome({"a": "b"})
with assert_raises(TypeError):
normalize_chromosome([])
def test_TP53_translation_from_cdna():
tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]
cdna = tp53_001.coding_sequence
amino_acids = translate_cdna(cdna, first_codon_is_start=True)
eq_(amino_acids, tp53_001.protein_sequence)
def test_sequence_key_for_variant_on_transcript_deletion_reverse_strand():
# delete start codon of TP53-001, which in reverse complement means
# deleting the sequence "CAT"
tp53_deletion = Variant(
"17", 7676592, "CAT", "", ensembl_grch38)
tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]
# Sequence of TP53 around start codon with 10 context nucleotides:
# In [51]: t.sequence[190-10:190+13]
# Out[51]: 'GGTCACTGCC_ATG_GAGGAGCCGC'
eq_(tp53_001.sequence[190 - 10:190 + 13], "GGTCACTGCCATGGAGGAGCCGC")
# get the 5 nucleotides before the variant and 10 nucleotides after
sequence_key = ReferenceSequenceKey.from_variant_and_transcript(
variant=tp53_deletion,
transcript=tp53_001,
context_size=10)
expected_sequence_key = ReferenceSequenceKey(
strand="-",
sequence_before_variant_locus="GGTCACTGCC",
sequence_at_variant_locus="ATG",
sequence_after_variant_locus="GAGGAGCCGC")
def test_reference_coding_sequence_key_insertion_inside_start_codon():
# insert nucleotide "C" in the middle of the start codon of TP53-001,
# keeping only 1 nucleotide of context. In the reverse complement this
# becomes 'T'>'TG'
tp53_insertion = Variant(
"17", 7676592, "T", "TG", ensembl_grch38)
tp53_001 = ensembl_grch38.transcripts_by_name("TP53-001")[0]
result = ReferenceCodingSequenceKey.from_variant_and_transcript(
variant=tp53_insertion,
transcript=tp53_001,
context_size=1)
assert result is None, "Expected result to be None when variant affects start codon"