Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
method_name = 'small_world_rand'
index_name = method_name + '.index'
if os.path.isfile(index_name):
os.remove(index_name)
f = '/tmp/foo.txt'
if not os.path.isfile(f):
print('creating %s' % f)
np.savetxt(f, np.random.rand(100000,1000), delimiter="\t")
print('done')
if fast:
index = nmslib.init(
space_type,
space_param,
method_name,
nmslib.DataType.DENSE_VECTOR,
nmslib.DistType.FLOAT)
with TimeIt('fast add data point'):
data = read_data_fast(f)
nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
nmslib.freeIndex(index)
if fast_batch:
index = nmslib.init(
space_type,
space_param,
method_name,
nmslib.DataType.DENSE_VECTOR,
nmslib.DistType.FLOAT)
with TimeIt('fast_batch add data point'):
offset = 0
for data in read_data_fast_batch(f, 10000):
np.save(args.out_npy, features)
print(
"[ Explained variance ratio: {ratio:.4} ]".format(
ratio=pipeline.named_steps["pca"].explained_variance_ratio_.
sum()
)
)
print("[== Saving pipeline ==]")
pickle.dump(pipeline, open(args.out_pipeline, "wb"))
index = nmslib.init(
method="hnsw",
space=args.knn_metric,
data_type=nmslib.DataType.DENSE_VECTOR
)
print("[== Adding features to indexer ==]")
index.addDataPointBatch(features)
print("[== Creating index ==]")
index.createIndex({"post": 1}, print_progress=True)
print("")
print("[== Saving index ==]")
index.saveIndex(args.out_knn)
if args.in_npy_test is not None:
test_features = np.load(args.in_npy_test, mmap_mode="r")
test_features = pipeline.transform(test_features)
np.save(args.out_npy_test, test_features)