Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_version_too_old_47():
EnsemblRelease(47)
from pyensembl import EnsemblRelease
ensembl75 = EnsemblRelease(75)
def test_gene_ids():
# only load chromosome 1 to speed up tests
df = ensembl75.dataframe(contig="1")
assert 'gene_id' in df
# Ensembl gene ids are formatted like ENSG00000223972
# which is always length 15
assert (df['gene_id'].str.len() == 15).all(), \
df[df['gene_id'].str.len() != 15]
def test_version_is_not_numeric():
EnsemblRelease("wuzzle")
def test_version_is_none():
EnsemblRelease(None)
# if user does not specify a sqlite database then use the one provided
# by the package
db_file = split(args.database)[1]
species = db_file.split('.')[1]
release = db_file.split('.')[2]
assert species in AVAILABLE_ENSEMBL_SPECIES, 'unsupported species!'
agfusion_db = agfusion.AGFusionDB(args.database, debug=args.debug)
agfusion_db.build = species + '_' + str(release)
# get the pyensembl data
pyensembl_data = pyensembl.EnsemblRelease(release, species)
try:
pyensembl_data.db
except ValueError:
agfusion_db.logger.error(
"Missing pyensembl data. Run pyensembl install --release " +
"{} --species {}".format(release, species)
)
exit()
# parse the re-coloring and re-naming
colors = {}
rename = {}
if args.rename is not None:
parser = argparse.ArgumentParser()
parser.add_argument("input",
help="VCF input file")
parser.add_argument("--output-vcf",
help="VCF output file (keeping only header and coding variants)")
parser.add_argument("--output-csv",
help="CSV output with chr/pos/ref/alt from VCF and annotation")
if __name__ == "__main__":
# TODO: determine ensembl release from VCF metadata
ensembl = pyensembl.EnsemblRelease(75)
args = parser.parse_args()
with open(args.input, 'r') as f:
vcf_reader = vcf.Reader(f)
for record in vcf_reader:
chrom, pos = record.CHROM, record.POS
ref, alt = record.REF, record.ALT
gene_names = ensembl.gene_names_at_locus(chrom, pos)
print chrom, "@", pos, " :: ", ref, "->", alt
for gene in gene_names:
print " ", gene
# TODO: implement transcript_names_of_gene_name in pyensembl
# and transcript_name_of_transcript_id
transcript_ids = ensembl.transcript_ids_of_gene_name(gene)
for transcript_id in transcript_ids:
print " ", transcript_id
def __init__(self, ensembl_release):
self.ensembl = pyensembl.EnsemblRelease(ensembl_release)