Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(args):
""" Main entry.
"""
data = Dataset(args.dataset)
f = data.base.shape[1]
for ntrees in args.ntrees:
t = AnnoyIndex(f) # Length of item vector that will be indexed
idxpath = os.path.join(args.exp_dir, 'sift_annoy_ntrees%d.idx' % ntrees)
if not os.path.exists(idxpath):
logging.info("Adding items ...")
for i in xrange(data.nbae):
t.add_item(i, data.base[i])
if i % 100000 == 0:
logging.info("\t%d/%d" % (i, data.nbae))
logging.info("\tDone!")
logging.info("Building indexes ...")
t.build(ntrees)
logging.info("\tDone!")
t.save(idxpath)
else:
logging.info("Loading indexes ...")
t.load(idxpath)
logging.info("\tDone!")
def test_build_sparse_annoy_index(annoy_index_file):
data = np.random.choice([0, 1], size=(10, 5))
sparse_data = csr_matrix(data)
index = build_annoy_index(sparse_data, annoy_index_file)
assert os.path.exists(annoy_index_file)
loaded_index = AnnoyIndex(5, metric='angular')
loaded_index.load(annoy_index_file)
assert index.f == loaded_index.f == 5
assert index.get_n_items() == loaded_index.get_n_items() == 10
assert index.get_nns_by_item(0, 5) == loaded_index.get_nns_by_item(0, 5)
index.unload()
loaded_index.unload()
trained = "/home/ubuntu/data"
fnv = '%s/vectors.fullwiki.1000.s50.num.npy' % trained
ffb = '%s/freebase_types_and_fullwiki.1000.s50.words' % trained
fnw = '/home/ubuntu/code/wizlang/data/freebase.words'
if os.path.exists(fnw + '.pickle'):
aw2i, ai2w = cPickle.load(open(fnw + '.pickle'))
else:
aw2i, ai2w = veclib.get_words(fnw)
cPickle.dump([aw2i, ai2w], open(fnw + '.pickle','w'))
print 'loaded word index'
if USE_ANNOY:
import annoy
annoy_index = annoy.AnnoyIndex(1000)
annoy_index.load("/home/ubuntu/code/wizlang/data/freebase.tree")
print 'loaded Annoy Index'
avl = annoy_index
else:
avl = veclib.get_vector_lib(fnv)
#avl = veclib.normalize(avl)
avl = veclib.split(veclib.normalize, avl)
frac = None
if frac:
end = int(avl.shape[0] * frac)
avl = avl[:end]
for i in range(end, avl.shape):
del aw2i[ai2w[i].pop()]
def fit(self, Ciu, show_progress=True):
# delay loading the annoy library in case its not installed here
import annoy
# train the model
super(AnnoyAlternatingLeastSquares, self).fit(Ciu, show_progress)
# build up an Annoy Index with all the item_factors (for calculating
# similar items)
if self.approximate_similar_items:
log.debug("Building annoy similar items index")
self.similar_items_index = annoy.AnnoyIndex(
self.item_factors.shape[1], 'angular')
for i, row in enumerate(self.item_factors):
self.similar_items_index.add_item(i, row)
self.similar_items_index.build(self.n_trees)
# build up a separate index for the inner product (for recommend
# methods)
if self.approximate_recommend:
log.debug("Building annoy recommendation index")
self.max_norm, extra = augment_inner_product_matrix(self.item_factors)
self.recommend_index = annoy.AnnoyIndex(extra.shape[1], 'angular')
for i, row in enumerate(extra):
self.recommend_index.add_item(i, row)
self.recommend_index.build(self.n_trees)
net_out = self.mx_model.get_outputs()
embedding = net_out[0].asnumpy()
nembedding = sklearn.preprocessing.normalize(embedding)
if _count
#
# Load stored Weights
tensorflow_session = tf.Session()
tensorflow_saver = tf.train.Saver()
print tcolor.OKGREEN,'Restore model from : ', PARAM_MODEL, tcolor.ENDC
tensorflow_saver.restore( tensorflow_session, PARAM_MODEL )
#
# Load ANN Index
with open( PARAM_DB_PREFIX+'/vlad_word.pickle', 'r' ) as handle:
print 'Read : ', PARAM_DB_PREFIX+'vlad_word.pickle'
words_db = pickle.load( handle )
t_ann = AnnoyIndex( words_db.shape[1], metric='euclidean' )
for i in range( words_db.shape[0] ):
t_ann.add_item( i, words_db[i,:] )
print 'Rebuild ANN Index' #TODO: Figure out why t_ann.load() does not work
t_ann.build(10)
#
# Init Renderer
app = TrainRenderer(queue_warning=False)
while True:
im = None
while im==None:
path (unicode / Path): The path to load from.
exclude (list): Names of serialization fields to exclude.
RETURNS (Sense2Vec): The loaded object.
"""
path = Path(path)
strings_path = path / "strings.json"
index_path = path / "index.ann"
freqs_path = path / "freqs.json"
self.vectors = Vectors().from_disk(path)
self.cfg.update(srsly.read_json(path / "cfg"))
if freqs_path.exists():
self.freqs = dict(srsly.read_json(freqs_path))
if "strings" not in exclude and strings_path.exists():
self.strings = StringStore().from_disk(strings_path)
if "index" not in exclude and index_path.exists():
self.index = AnnoyIndex(self.vectors.shape[1], self.cfg["annoy_metric"])
self.index.load(str(index_path))
return self
import h5py
import random
from facemaps.data.database import FacialEmbeddings
from annoy import AnnoyIndex
from config import CONFIG
def get_shortcode_from_facialembeddings_id(fe_id):
return FacialEmbeddings.get(id=fe_id).op.shortcode
# Tree settings
annoy_settings = CONFIG['annoy_tree_settings']
tree = AnnoyIndex(128, metric=annoy_settings['metric'])
tree.load(CONFIG['annoy_tree'])
# Random seed index
seed_idx = random.randint(0, len(FacialEmbeddings.select()))
print(
'Original search: https://www.instagram.com/p/{}/'.format(get_shortcode_from_facialembeddings_id(seed_idx))
)
print('---' * 10)
print('Similar faces:')
idxs = tree.get_nns_by_item(seed_idx, 32)
shortcodes_unique = []
for i in idxs[1:]:
s_ = get_shortcode_from_facialembeddings_id(i)
if s_ not in shortcodes_unique:
shortcodes_unique.append(s_)
"""
fname_dict = fname + '.d'
if not (os.path.exists(fname) and os.path.exists(fname_dict)):
raise IOError(
"Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict)
)
else:
try:
from annoy import AnnoyIndex
except ImportError:
raise _NOANNOY
with utils.open(fname_dict, 'rb') as f:
d = _pickle.loads(f.read())
self.num_trees = d['num_trees']
self.index = AnnoyIndex(d['f'])
self.index.load(fname)
self.labels = d['labels']
def generateAnnoy(real, artificial, annoyFilename, dimensions):
idx2vec = np.array(artificial[2])
t = AnnoyIndex(dimensions)
for j in range(len(artificial[2])):
t.add_item(j,idx2vec[j])
print('Done Adding items to AnnoyIndex')
t.build(TREESIZE)
print('Done Building AnnoyIndex')
t.save(annoyFilename)
return t