Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print(res[:5])
for i in res[0]:
print(int(i), distance.cosine(q0, dataset[int(i),:]))
#space_type = 'cosinesimil_sparse'
space_type = 'cosinesimil_sparse_fast'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '_sparse.index'
if os.path.isfile(index_name):
os.remove(index_name)
index = nmslib.init(space_type,
space_param,
method_name,
nmslib.DataType.SPARSE_VECTOR,
nmslib.DistType.FLOAT)
if batch:
with TimeIt('batch add'):
positions = nmslib.addDataPointBatch(index, np.arange(len(dataset), dtype=np.int32), data_matrix)
print('positions', positions)
else:
d = []
q = []
with TimeIt('preparing'):
for data in dataset:
d.append([[i, v] for i, v in enumerate(data) if v > 0])
for data in queryset:
q.append([[i, v] for i, v in enumerate(data) if v > 0])
with TimeIt('adding points'):
for id, data in enumerate(d):
nmslib.addDataPoint(index, id, data)
def setUp(self):
space_type = 'normleven'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '.index'
if os.path.isfile(index_name):
os.remove(index_name)
self.index = nmslib.init(
space_type,
space_param,
method_name,
nmslib.DataType.OBJECT_AS_STRING,
nmslib.DistType.FLOAT)
def test_sparse_vector_fresh():
space_type = 'cosinesimil_sparse_fast'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '_sparse.index'
if os.path.isfile(index_name):
os.remove(index_name)
index = nmslib.init(
space_type,
space_param,
method_name,
nmslib.DataType.SPARSE_VECTOR,
nmslib.DistType.FLOAT)
for id, data in enumerate(read_sparse_data('sample_sparse_dataset.txt')):
nmslib.addDataPoint(index, id, data)
print('We have added %d data points' % nmslib.getDataPointQty(index))
for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
print(nmslib.getDataPoint(index,i))
print('Let\'s invoke the index-build process')
index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
query_time_param = ['efSearch=50']
nmslib.createIndex(index, index_param)
def test_vector_fresh(fast=True):
space_type = 'cosinesimil'
space_param = []
method_name = 'small_world_rand'
index_name = method_name + '.index'
if os.path.isfile(index_name):
os.remove(index_name)
index = nmslib.init(
space_type,
space_param,
method_name,
nmslib.DataType.DENSE_VECTOR,
nmslib.DistType.FLOAT)
start = time.time()
if fast:
data = read_data_fast('sample_dataset.txt')
print('data.shape', data.shape)
positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
else:
for id, data in enumerate(read_data('sample_dataset.txt')):
pos = nmslib.addDataPoint(index, id, data)
if id != pos:
print('id %s != pos %s' % (id, pos))
sys.exit(1)
end = time.time()
print('added data in %s secs' % (end - start))
print('Let\'s print a few data entries')
print(idx, nmslib.knnQuery(index, k, data))
nmslib.saveIndex(index, index_name)
print("The index %s is saved" % index_name)
nmslib.freeIndex(index)
if __name__ == '__main__':
print('DENSE_VECTOR', nmslib.DataType.DENSE_VECTOR)
print('SPARSE_VECTOR', nmslib.DataType.SPARSE_VECTOR)
print('OBJECT_AS_STRING', nmslib.DataType.OBJECT_AS_STRING)
print('DistType.INT', nmslib.DistType.INT)
print('DistType.FLOAT', nmslib.DistType.FLOAT)
test_vector_load()
test_vector_fresh()
test_vector_fresh(False)
test_vector_loaded()
gen_sparse_data()
test_sparse_vector_fresh()
test_string_fresh()
test_string_fresh(False)
test_string_loaded()
test_object_as_string_fresh()
def fit(self, X):
self._index = nmslib.initIndex(X.shape[0], self._nmslib_metric, [], self._method_name, self._method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT)
for i, x in enumerate(X):
nmslib.setData(self._index, i, x.tolist())
nmslib.buildIndex(self._index)
def fit(self, X):
import nmslib
self._index = nmslib.init(
self._nmslib_metric,
[],
self._method_name,
nmslib.DataType.DENSE_VECTOR,
nmslib.DistType.FLOAT)
for i, x in enumerate(X):
nmslib.addDataPoint(self._index, i, x.tolist())
if os.path.exists(self._index_name):
logging.debug("Loading index from file")
nmslib.loadIndex(self._index, self._index_name)
else:
logging.debug("Create Index")
nmslib.createIndex(self._index, self._index_param)
if self._save_index:
nmslib.saveIndex(self._index, self._index_name)
nmslib.setQueryTimeParams(self._index, self._query_param)