Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def store_time(model, name):
def inner(iteration, elapsed):
print(name, model.factors, iteration, elapsed)
times[name][model.factors].append(elapsed)
return inner
output = defaultdict(list)
for factors in range(32, 257, 32):
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cg%i' % steps)
model.cg_steps = steps
model.fit(plays)
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cholesky')
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'gpu')
model.fit(plays)
# take the min time for the output
output['factors'].append(factors)
for name, stats in times.items():
output[name].append(min(stats[factors]))
return output
def get_nns_by_item(self, itemid, N=10):
v = self.index.get_item_vector(itemid)
v[-1] = 0
return self._get_nns(v)
def _get_nns(self, v, N=10):
ids, dist = self.index.get_nns_by_vector(v, N, include_distances=True)
# convert the distances from euclidean to cosine distance,
# and then rescale the cosine distance to go back to inner product
scaling = self.max_norm * numpy.linalg.norm(v)
return ids, scaling * (1 - (numpy.array(dist) ** 2) / 2)
class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):
""" A version of the AlternatingLeastSquares model that uses an annoy
index to calculate similar items. This leads to massive speedups
when called repeatedly """
def fit(self, Ciu):
# train the model
super(AnnoyAlternatingLeastSquares, self).fit(Ciu)
# build up an Annoy Index with all the item_factors (for calculating similar items)
self.cosine_index = annoy.AnnoyIndex(self.item_factors.shape[1], 'angular')
for i, row in enumerate(self.item_factors):
self.cosine_index.add_item(i, row)
self.cosine_index.build(self.factors)
# build up a separate index for the inner product (for recommend methods)
self.inner_product_index = MaximumInnerProductIndex(self.item_factors)
def benchmark_implicit(ratings, factors, iterations=5, use_gpu=False):
ratings = ratings.tocsr()
times = {}
for rank in factors:
model = implicit.als.AlternatingLeastSquares(factors=rank,
iterations=iterations,
use_gpu=use_gpu)
start = time.time()
model.fit(ratings)
elapsed = time.time() - start
# take average time over iterations to be consistent with spark timings
times[rank] = elapsed / iterations
print("implicit. factors=%i took %.3f" % (rank, elapsed/iterations))
return times
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
iterations=25)
model.cg_steps = steps
model.fit_callback = store_loss(model, 'cg%i' % steps)
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
regularization=0, iterations=25)
model.fit_callback = store_loss(model, 'gpu')
model.use_gpu = True
model.fit(plays)
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
iterations=25)
model.fit_callback = store_loss(model, 'cholesky')
model.fit(plays)
return output
def benchmark_accuracy(plays):
output = defaultdict(list)
def store_loss(model, name):
def inner(iteration, elapsed):
loss = calculate_loss(plays, model.item_factors, model.user_factors, 0)
print("model %s iteration %i loss %.5f" % (name, iteration, loss))
output[name].append(loss)
return inner
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
iterations=25)
model.cg_steps = steps
model.fit_callback = store_loss(model, 'cg%i' % steps)
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
regularization=0, iterations=25)
model.fit_callback = store_loss(model, 'gpu')
model.use_gpu = True
model.fit(plays)
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
iterations=25)
model.fit_callback = store_loss(model, 'cholesky')
model.fit(plays)
def build(self):
# define iALS model instance
self._model = implicit.als.AlternatingLeastSquares(factors=self.rank,
regularization=self.regularization,
iterations=self.num_epochs,
num_threads=self.num_threads)
# prepare input matrix for learning the model
matrix = self.get_training_matrix() # user_by_item sparse matrix
matrix.data = self.confidence(matrix.data, alpha=self.alpha,
weight=self.weight_func, epsilon=self.epsilon)
with track_time(self.training_time, verbose=self.verbose, model=self.method):
# build the model
# implicit takes item_by_user matrix as input, need to transpose
self._model.fit(matrix.T)
output = defaultdict(list)
for factors in range(32, 257, 32):
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cg%i' % steps)
model.cg_steps = steps
model.fit(plays)
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cholesky')
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'gpu')
model.fit(plays)
# take the min time for the output
output['factors'].append(factors)
for name, stats in times.items():
output[name].append(min(stats[factors]))
return output
def parse_map_line_simple(self, items, line):
map_item = self.key_value(line)
if map_item:
(key, value) = map_item
key = convertImplicit(key)
if items.has_key(key):
self.error("Duplicate key "+key)
items[key] = self.parse_value(value)
else:
self.error("bad key for map")
def parse_untyped_value(self, value):
parse = self.parseSpecial(value)
if parse:
(ok, data) = parse
return data
token = getToken("(\S.*)", value)
if token:
lines = [token] + \
pruneTrailingEmpties(self.nestedDocs.popNestedLines())
return convertImplicit(joinLines(lines))
else:
self.nestedDocs.nestToNextLine()
return self.parseLines()
def parse_map_line_simple(self, items, line):
map_item = self.key_value(line)
if map_item:
(key, value) = map_item
key = convertImplicit(key)
if items.has_key(key):
self.error("Duplicate key "+key)
items[key] = self.parse_value(value)
else:
self.error("bad key for map")