Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def load_pdb(data_folder):
pdb_dumpfile = os.path.join(data_folder, 'gene2pdb.pyobj')
data = loadobj(pdb_dumpfile)
return data
def load(self):
if self.map is None:
self.retired2current.load()
self.map = {}
ensembl2entrez_li = loadobj(("ensembl_gene__2entrezgene_list.pyobj", self.db_provider()), mode='gridfs')
#filter out those deprecated entrez gene ids
for ensembl_id,entrez_id in ensembl2entrez_li:
entrez_id = int(entrez_id)
if entrez_id in self.retired2current:
self.map[ensembl_id] = self.retired2current.translate(entrez_id)
def load_chr_data(self):
self.logger.info("\tLoading chromosome data from '%s'..." % self.genome)
try:
self._chr_data = loadobj(self.genome)
except Exception as e:
self.logger.info(e)
raise
self.logger.info("Done.")
def load_pir(data_folder):
pir_dumpfile = os.path.join(data_folder, 'gene2pir.pyobj')
data = loadobj(pir_dumpfile)
return data
def load_chr_data(self):
print("\tLoading chromosome data...", end='')
self._chr_data = loadobj(HG19_DATAFILE)
print("Done.")
Note that all ids are int type.
'''
if species_li:
taxid_set = set([TAXONOMY[species]["tax_id"] for species in species_li])
else:
taxid_set = None
orig_cwd = os.getcwd()
os.chdir(data_folder)
# check cache file
_cache_file = 'geneid_d.pyobj'
if load_cache and os.path.exists(_cache_file) and \
file_newer(_cache_file, 'gene_info.gz') and \
file_newer(_cache_file, 'gene_history.gz'):
_taxid_set, out_d = loadobj(_cache_file)
assert _taxid_set == taxid_set
os.chdir(orig_cwd)
return out_d
DATAFILE = os.path.join(data_folder, 'gene_info.gz')
if species_li:
species_filter = lambda ld: int(ld[0]) in taxid_set and (only_for and ld[1] in only_for)
elif only_for:
species_filter = lambda ld: only_for and ld[1] in only_for
else:
species_filter = None
geneid_li = set(tab2list(DATAFILE, 1, includefn=species_filter))
DATAFILE = os.path.join(data_folder, 'gene_history.gz')
if species_li:
def main(self, diff_filepath, merge_collection, field):
diff = loadobj(diff_filepath)
source_collection = diff['source']
add_ids = diff['add']
delete_ids = diff['delete']
update_ids = [_doc['_id'] for _doc in diff['update']]
self.add_update(source_collection, merge_collection, add_ids)
self.add_update(source_collection, merge_collection, update_ids)
self.delete(merge_collection, field, delete_ids)