How to use the nmslib.init function in nmslib

To help you get started, we’ve selected a few nmslib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nmslib / nmslib / python_bindings / unit_tests.py View on Github external
def setUp(self):
        space_type = 'leven'
        space_param = []
        method_name = 'small_world_rand'
        index_name  = method_name + '.index'
        if os.path.isfile(index_name):
            os.remove(index_name)
        self.index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)
github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
def test_string_loaded():
    DATA_STRS = ["xyz", "beagcfa", "cea", "cb",
                 "d", "c", "bdaf", "ddcd",
                 "egbfa", "a", "fba", "bcccfe",
                 "ab", "bfgbfdc", "bcbbgf", "bfbb"
                 ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'

    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)

    for id, data in enumerate(DATA_STRS):
        nmslib.addDataPoint(index, id, data)

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
        print(nmslib.getDataPoint(index,i))

    print('Let\'s invoke the index-build process')
github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast_batch add data point'):
            offset = 0
            for data in read_data_fast_batch(f, 10000):
                nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32) + offset, data)
                offset += data.shape[0]
        print('offset', offset)
        nmslib.freeIndex(index)

    if seq:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('seq add data point'):
            for id, data in enumerate(read_data(f)):
                nmslib.addDataPoint(index, id, data)
        nmslib.freeIndex(index)
github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
def test_vector_fresh(fast=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)

    start = time.time()
    if fast:
        data = read_data_fast('sample_dataset.txt')
        print('data.shape', data.shape)
        positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data('sample_dataset.txt')):
            pos = nmslib.addDataPoint(index, id, data)
            if id != pos:
                print('id %s != pos %s' % (id, pos))
github logstar / scedar / scedar / eda / sdm.py View on Github external
if query_params is None:
            query_params = {
                "efSearch": 100
            }

        # create index
        ind_pm_key = sorted([(k, v) for k, v in index_params.items()
                              if k in ["efConstruction", "M",
                                       "delaunay_type", "post"]])
        ind_pm_key.append(("metric", metric))
        ind_pm_key.append(("use_pca", use_pca))
        str_ind_pm_key = str(ind_pm_key)
        if str_ind_pm_key in self._hnsw_index_lut:
            hnsw = self._hnsw_index_lut[str_ind_pm_key]
        else:
            hnsw = nmslib.init(method="hnsw", space=metric,
                               data_type=data_type)
            hnsw.addDataPointBatch(data_x)
            hnsw.createIndex(index_params, print_progress=verbose)
            self._hnsw_index_lut[str_ind_pm_key] = hnsw
        # query KNN
        hnsw.setQueryTimeParams(query_params)
        # k nearest neighbors
        # hnsw query may include self.
        compute_k = k + 1
        knns = hnsw.knnQueryBatch(
            data_x, k=compute_k, num_threads=self._nprocs)
        # print(knns)
        # construct knn conn mat.
        knn_targets_sep_l = []
        knn_weights_sep_l = []
        # need benchmark
github lenck / vlb / python / bench / RetrievalBenchmark.py View on Github external
desc = desc.astype(np.float32)
                        desc /= (desc.sum(axis=1, keepdims=True) + 1e-7)
                        desc = np.sqrt(desc)
                        desc_p = pow(desc, 2)
                        desc /= np.sqrt(desc_p.sum(axis=1,
                                                   keepdims=True) + 1e-7)
                    for i in range(desc.shape[0]):
                        desc_list.append(desc[i, :])
                image_index += 1

            desc_list = np.array(desc_list)

            expand = 3
            nn = 30

            index = nmslib.init(space='l2', method='hnsw')
            index.addDataPointBatch(data=desc_list)
            index.createIndex(
                print_progress=True,
                index_params={
                    "maxM": 32,
                    "maxM0": 64,
                    "indexThreadQty": 24})
            index.setQueryTimeParams(params={"ef": nn * expand})

            query_result = []

            pbar = tqdm(dataset.query_list)
            filter_flag = False
            for query_idx, image_filepath in enumerate(pbar):
                if len(image_filepath) == 5:
                    image_filepath, left, top, bottom, right = image_filepath
github yl-1993 / learn-to-cluster / utils / knn.py View on Github external
def __init__(self, feats, k, index_path='', verbose=True):
        import nmslib
        self.verbose = verbose
        with Timer('[hnsw] build index', verbose):
            """ higher ef leads to better accuracy, but slower search
                higher M leads to higher accuracy/run_time at fixed ef, but consumes more memory
            """
            # space_params = {
            #     'ef': 100,
            #     'M': 16,
            # }
            # index = nmslib.init(method='hnsw', space='cosinesimil', space_params=space_params)
            index = nmslib.init(method='hnsw', space='cosinesimil')
            if index_path != '' and os.path.isfile(index_path):
                index.loadIndex(index_path)
            else:
                index.addDataPointBatch(feats)
                index.createIndex({'post': 2, 'indexThreadQty': 1}, print_progress=verbose)
                if index_path:
                    print('[hnsw] save index to {}'.format(index_path))
                    mkdir_if_no_exists(index_path)
                    index.saveIndex(index_path)
        with Timer('[hnsw] query topk {}'.format(k), verbose):
            knn_ofn = index_path + '.npz'
            if os.path.exists(knn_ofn):
                print('[hnsw] read knns from {}'.format(knn_ofn))
                self.knns = [(knn[0, :].astype(np.int32), knn[1, :].astype(np.float32)) \
                                for knn in np.load(knn_ofn)['data']]
            else: