Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(no_confirm=True):
src_dump = get_src_dump()
lastmodified = check_lastmodified()
doc = src_dump.find_one({'_id': 'uniprot'})
if doc and 'lastmodified' in doc and lastmodified <= doc['lastmodified']:
path, filename = os.path.split(DATAFILE_PATH)
data_file = os.path.join(doc['data_folder'], filename)
if os.path.exists(data_file):
logging.info("No newer file found. Abort now.")
sys.exit(0)
if not ARCHIVE_DATA:
rmdashfr(DATA_FOLDER)
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
else:
if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
no_confirm = True # set it to True for running this script automatically without intervention.
if not ARCHIVE_DATA:
rmdashfr(DATA_FOLDER)
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
else:
if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
sys.exit()
logfile = os.path.join(DATA_FOLDER, 'entrez_dump.log')
setup_logfile(logfile)
#mark the download starts
src_dump = get_src_dump()
doc = {'_id': 'entrez',
'timestamp': TIMESTAMP,
'data_folder': DATA_FOLDER,
'logfile': logfile,
'status': 'downloading'}
src_dump.save(doc)
t0 = time.time()
download(DATA_FOLDER, no_confirm=no_confirm)
t_download = timesofar(t0)
t1 = time.time()
#mark parsing starts
src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
parse_gbff(DATA_FOLDER)
t_parsing = timesofar(t1)
t_total = timesofar(t0)
def check_refseq_release():
refseq_release = get_refseq_release()
src_dump = get_src_dump()
doc = src_dump.find_one({'_id': 'refseq'})
if doc and 'release' in doc and refseq_release <= doc['release']:
data_file = os.path.join(doc['data_folder'], 'complete.109.rna.gbff.gz')
if os.path.exists(data_file):
logging.info("No newer release found. Abort now.")
sys.exit(0)
def main(no_confirm=True):
src_dump = get_src_dump()
lastmodified = check_lastmodified()
doc = src_dump.find_one({'_id': 'exac'})
if doc and 'lastmodified' in doc and lastmodified <= doc['lastmodified']:
path, filename = os.path.split(DATAFILES_PATH[0])
data_file = os.path.join(doc['data_folder'], filename)
if os.path.exists(data_file):
logging.info("No newer file found. Abort now.")
sys.exit(0)
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
else:
if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
sys.exit(0)
logfile = os.path.join(DATA_FOLDER, 'exac_dump.log')
def main(no_confirm=True):
src_dump = get_src_dump()
download_list = get_file_list_for_download()
if len(download_list) == 0:
logging.info("No newer file found. Abort now.")
sys.exit(0)
doc = src_dump.find_one({'_id': 'ucsc'})
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
logfile = os.path.join(DATA_FOLDER, 'ucsc_dump.log')
setup_logfile(logfile)
# mark the download starts
doc = {'_id': 'ucsc',
'timestamp': timestamp,
'data_folder': DATA_FOLDER,
def main(no_confirm=True):
src_dump = get_src_dump()
lastmodified = check_header()
doc = src_dump.find_one({'_id': 'pharmgkb'})
if doc and 'lastmodified' in doc and lastmodified <= doc['lastmodified']:
data_file = os.path.join(doc['data_folder'], 'genes.zip')
if os.path.exists(data_file):
logging.info("No newer file found. Abort now.")
sys.exit(0)
if not ARCHIVE_DATA:
rmdashfr(DATA_FOLDER)
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
else:
if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
sys.exit(0)
def main_cron(no_confirm=True):
'''set no_confirm to True for running this script automatically
without intervention.'''
src_dump = get_src_dump()
mart_version = chk_latest_mart_version()
logging.info("Checking latest mart_version:\t%s" % mart_version)
doc = src_dump.find_one({'_id': 'ensembl'})
if doc and 'release' in doc and mart_version <= doc['release']:
data_file = os.path.join(doc['data_folder'], 'gene_ensembl__gene__main.txt')
if os.path.exists(data_file):
logging.info("No newer release found. Abort now.")
sys.exit(0)
DATA_FOLDER = os.path.join(ENSEMBL_FOLDER, str(mart_version))
if not os.path.exists(DATA_FOLDER):
os.makedirs(DATA_FOLDER)
else:
if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
sys.exit(0)
def redo_parse_gbff(path):
'''call this function manually to re-start the parsing step and set src_dump.
This is used when main() is broken at parsing step, then parsing need to be re-started
after the fix.
'''
#mark the download starts
src_dump = get_src_dump()
t0 = time.time()
t_download = timesofar(t0)
t1 = time.time()
#mark parsing starts
src_dump.update({'_id': 'entrez'}, {'$set': {'status': 'parsing'}})
parse_gbff(path)
t_parsing = timesofar(t1)
t_total = timesofar(t0)
#mark the download finished successfully
_updates = {
'status': 'success',
'time': {
'download': t_download,
'parsing': t_parsing,