How to use the biothings.utils.common.ask function in biothings

To help you get started, we’ve selected a few biothings examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biothings / mygene.info / src / dataload / data_dump / dl_uniprot.py View on Github external
def download(no_confirm=False):
    orig_path = os.getcwd()
    try:
        os.chdir(DATA_FOLDER)
        path, filename = os.path.split(DATAFILE_PATH)
        if os.path.exists(filename):
            if no_confirm or ask('Remove existing file "%s"?' % filename) == 'Y':
                os.remove(filename)
            else:
                logging.info("Skipped!")
                return
        logging.info('Downloading "%s"...' % filename)
        url = 'ftp://{}/{}'.format(FTP_SERVER, DATAFILE_PATH)
        cmdline = 'wget %s -O %s' % (url, filename)
        #cmdline = 'axel -a -n 5 %s' % url   #faster than wget using 5 connections
        return_code = os.system(cmdline)
        if return_code == 0:
            logging.info("Success.")
        else:
            logging.info("Failed with return code (%s)." % return_code)
        logging.info("=" * 50)
    finally:
        os.chdir(orig_path)
github biothings / mygene.info / src / dataload / data_dump / dl_refseq.py View on Github external
refseq_release = get_refseq_release()
    logging.info(refseq_release)

    src_dump = get_src_dump()
    doc = src_dump.find_one({'_id': 'refseq'})
    if doc and 'release' in doc and refseq_release <= doc['release']:
        data_file = os.path.join(doc['data_folder'], 'complete.109.rna.gbff.gz')
        if os.path.exists(data_file):
            logging.info("No newer release found. Abort now.")
            sys.exit(0)

    DATA_FOLDER = os.path.join(REFSEQ_FOLDER, str(refseq_release))
    if not os.path.exists(DATA_FOLDER):
        os.makedirs(DATA_FOLDER)
    else:
        if not (no_confirm or len(os.listdir(DATA_FOLDER)) == 0 or ask('DATA_FOLDER (%s) is not empty. Continue?' % DATA_FOLDER) == 'Y'):
            sys.exit(0)

    logfile = os.path.join(DATA_FOLDER, 'refseq_dump.log')
    setup_logfile(logfile)

    #mark the download starts
    doc = {'_id': 'refseq',
           'release': refseq_release,
           'timestamp': time.strftime('%Y%m%d'),
           'data_folder': DATA_FOLDER,
           'logfile': logfile,
           'status': 'downloading'}
    src_dump.save(doc)
    t0 = time.time()

    try:
github biothings / mygene.info / src / hub / databuild / indexer.py View on Github external
def build_index(config, use_parallel=True, noconfirm=False):
    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config(config)
    target_collection = bdr.pick_target_collection()
    target_es_index = 'genedoc_' + bdr._build_config['name']

    if target_collection:
        es_idxer = ESIndexer(mapping=bdr.get_mapping())
        es_idxer.ES_INDEX_NAME = target_es_index
        es_idxer.step = 10000
        es_idxer.use_parallel = use_parallel
        es_server = es_idxer.conn.servers[0].geturl()
        print("ES target: {}/{}/{}".format(es_server,
                                           es_idxer.ES_INDEX_NAME,
                                           es_idxer.ES_INDEX_TYPE))
        if noconfirm or ask("Continue?") == 'Y':
            #es_idxer.s = 609000
            #es_idxer.conn.indices.delete_index(es_idxer.ES_INDEX_NAME)
            es_idxer.create_index()
            es_idxer.delete_index_type(es_idxer.ES_INDEX_TYPE, noconfirm=noconfirm)
            es_idxer.build_index(target_collection, verbose=False)
            es_idxer.optimize()
        else:
            print("Aborted.")
    else:
        print("Error: target collection is not ready yet or failed to build.")
github biothings / mygene.info / src / utils / es.py View on Github external
def delete_index_type(self, index_type, noconfirm=False):
        '''Delete all indexes for a given index_type.'''
        index_name = self.ES_INDEX_NAME
        # Check if index_type exists
        m = self.conn.indices.get_mapping(index_name, index_type)
        if not m:
            print('Error: index type "%s" does not exist in index "%s".'
                  % (index_type, index_name))
            return
        path = '/%s/%s' % (index_name, index_type)
        if noconfirm or ask(
                'Confirm to delete all data under "%s":' % path) == 'Y':
            return self.conn.indices.delete_mapping(
                    index=index_name, doc_type=index_type)
github biothings / mygene.info / src / dataindex / es_sync.py View on Github external
if len(sys.argv) > 1:
        config = sys.argv[1]
    else:
        config = 'mygene_allspecies'
    if not config.startswith('genedoc_'):
        config = 'genedoc_' + config
    assert config in ['genedoc_mygene', 'genedoc_mygene_allspecies']
    noconfirm = '-b' in sys.argv

    _changes_fn = _get_current_changes_fn(config)
    if _changes_fn:
        print("Changes file: " + _changes_fn)
    else:
        print("No changes file found. Aborted.")
        return -1
    if noconfirm or ask("Continue to load?") == 'Y':
        changes = loadobj(_changes_fn)
    else:
        print("Aborted.")
        return -2

    _es_index = config + TARGET_ES_INDEX_SUFFIX
    # ES host will be set depending on whether a tunnel is used or not
    with open_tunnel() as tunnel:
        if tunnel.ok:
            _es_host = 'localhost:' + str(es_local_tunnel_port)
        else:
            _es_host = ES_HOST

        esi = ESIndexer2(_es_index, es_host=_es_host)

        meta = esi.get_mapping_meta(changes)
github biothings / mygene.info / src / dataload / data_dump / dl_pharmgkb.py View on Github external
def download(no_confirm=False):
    orig_path = os.getcwd()
    try:
        os.chdir(DATA_FOLDER)
        filename = 'genes.zip'
        url = GENES_URL
        if os.path.exists(filename):
            if no_confirm or ask('Remove existing file "%s"?' % filename) == 'Y':
                os.remove(filename)
            else:
                logging.info("Skipped!")
                return
        logging.info('Downloading "%s"...' % filename)
        cmdline = 'wget "%s" -O %s' % (url, filename)
        #cmdline = 'axel -a -n 5 %s' % url   #faster than wget using 5 connections
        return_code = os.system(cmdline)
        if return_code == 0:
            logging.info("Success.")
        else:
            logging.info("Failed with return code (%s)." % return_code)
        logging.info("=" * 50)
    finally:
        os.chdir(orig_path)
github biothings / mygene.info / src / databuild / sync.py View on Github external
logging.info("\n".join(['\t' + x for x in new_src_li]))

    if no_confirm or ask('Continue?') == 'Y':
        logfile = 'databuild_sync_{}_{}.log'.format(config, time.strftime('%Y%m%d'))
        logfile = os.path.join(LOG_FOLDER, logfile)
        setup_logfile(logfile)

        for src in new_src_li:
            t0 = time.time()
            logging.info("Current source collection: %s" % src)
            ts = _get_timestamp(src, as_str=True)
            logging.info("Calculating changes... ")
            changes = sc.get_changes(src, use_parallel=use_parallel)
            logging.info("Done")
            get_changes_stats(changes)
            if no_confirm or ask("Continue to save changes...") == 'Y':
                if config == 'genedoc_mygene':
                    dumpfile = 'changes_{}.pyobj'.format(ts)
                else:
                    dumpfile = 'changes_{}_allspecies.pyobj'.format(ts)
                dump(changes, dumpfile)
                dumpfile_key = 'genedoc_changes/' + dumpfile
                logging.info('Saving to S3: "{}"... '.format(dumpfile_key))
                send_s3_file(dumpfile, dumpfile_key)
                logging.info('Done.')
                #os.remove(dumpfile)

            if no_confirm or ask("Continue to apply changes...") == 'Y':
                sc.apply_changes(changes)
                sc.verify_changes(changes)
            logging.info('=' * 20)
            logging.info("Finished. %s" % timesofar(t0))
github biothings / mygene.info / src / hub / databuild / indexer.py View on Github external
def clean_target_collection():
    bdr = DataBuilder(backend='mongodb')
    bdr.load_build_config('mygene')
    try:
        target_collection = bdr.pick_target_collection(autoselect=False)
    except KeyboardInterrupt:
        print("Aborted.")
        return

    if ask('Delete collection "{}"'.format(target_collection.name)) == 'Y':
        if ask("Double check! Are you sure?") == 'Y':
            target_collection.drop()
            print('Done, collection "{}" was dropped.'.format(target_collection.name))
github biothings / mygene.info / src / databuild / sync.py View on Github external
def rename_from_temp_collection(config,from_index,no_confirm=False):
    # check if index exist before chenging anything
    sc = GeneDocSyncer(config)
    if not from_index in sc._db.collection_names():
        logging.error("Collection '%s' does not exist" % from_index)
    from_col = sc._db.get_collection(from_index)
    orig_name = sc._target_col.name
    logging.info("Backing up timestamp from '%s'" % orig_name)
    if no_confirm or ask('Continue?') == 'Y':
        bckfile = backup_timestamp_main([config]).pop()
    else:
        bckfile = None
    # rename existing current for backup purpose
    bck_name = orig_name + "_bck_%s" % time.strftime('%Y%m%d%H%M%S')
    logging.info("Renaming %s to %s" % (orig_name,bck_name))
    if no_confirm or ask('Continue?') == 'Y':
        sc._target_col.rename(bck_name)
    logging.info("Renaming %s to %s" % (from_col.name,orig_name))
    if no_confirm or ask('Continue?') == 'Y':
        from_col.rename(orig_name)
    if bckfile is None:
        try:
            pat = "%s_current_tsbk_*.txt.bz" % config
            logging.info("Looking for '%s'" % pat)
            bckfile = sorted(glob.glob(pat))[0]
            if ask("Do you want me to apply timestamp from file '%s' to collection '%s' ?" % (bckfile,sc._target_col.name)) == 'Y':
                pass
            else:
                return
        except IndexError:
            logging.error("Can't find any timstamp file to apply, giving up...")
            return
github biothings / mygene.info / src / utils / es.py View on Github external
pat = prefix + '_(\d{8})_\w{8}'
        _li = []
        for index in index_li:
            mat = re.match(pat, index)
            if mat:
                _li.append((mat.group(1), index))
        _li.sort()   # older collection appears first
        # keep last # of newer indices
        index_to_remove = [x[1] for x in _li[:-keep_last]]
        if len(index_to_remove) > 0:
            print("{} \"{}*\" indices will be removed.".format(
                  len(index_to_remove), prefix))
            if verbose:
                for index in index_to_remove:
                    print('\t', index)
            if noconfirm or ask("Continue?") == 'Y':
                for index in index_to_remove:
                    if dryrun:
                        print("dryrun=True, nothing is actually deleted")
                    else:
                        conn.indices.delete(index)
                print("Done.[%s indices removed]" % len(index_to_remove))
            else:
                print("Aborted.")
        else:
            print("Nothing needs to be removed.")