Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def store_time(model, name):
def inner(iteration, elapsed):
print(name, model.factors, iteration, elapsed)
times[name][model.factors].append(elapsed)
return inner
output = defaultdict(list)
for factors in range(32, 257, 32):
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cg%i' % steps)
model.cg_steps = steps
model.fit(plays)
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cholesky')
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'gpu')
model.fit(plays)
# take the min time for the output
output['factors'].append(factors)
for name, stats in times.items():
output[name].append(min(stats[factors]))
return output
def get_nns_by_item(self, itemid, N=10):
v = self.index.get_item_vector(itemid)
v[-1] = 0
return self._get_nns(v)
def _get_nns(self, v, N=10):
ids, dist = self.index.get_nns_by_vector(v, N, include_distances=True)
# convert the distances from euclidean to cosine distance,
# and then rescale the cosine distance to go back to inner product
scaling = self.max_norm * numpy.linalg.norm(v)
return ids, scaling * (1 - (numpy.array(dist) ** 2) / 2)
class AnnoyAlternatingLeastSquares(AlternatingLeastSquares):
""" A version of the AlternatingLeastSquares model that uses an annoy
index to calculate similar items. This leads to massive speedups
when called repeatedly """
def fit(self, Ciu):
# train the model
super(AnnoyAlternatingLeastSquares, self).fit(Ciu)
# build up an Annoy Index with all the item_factors (for calculating similar items)
self.cosine_index = annoy.AnnoyIndex(self.item_factors.shape[1], 'angular')
for i, row in enumerate(self.item_factors):
self.cosine_index.add_item(i, row)
self.cosine_index.build(self.factors)
# build up a separate index for the inner product (for recommend methods)
self.inner_product_index = MaximumInnerProductIndex(self.item_factors)
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
iterations=25)
model.cg_steps = steps
model.fit_callback = store_loss(model, 'cg%i' % steps)
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
regularization=0, iterations=25)
model.fit_callback = store_loss(model, 'gpu')
model.use_gpu = True
model.fit(plays)
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
iterations=25)
model.fit_callback = store_loss(model, 'cholesky')
model.fit(plays)
return output
def benchmark_accuracy(plays):
output = defaultdict(list)
def store_loss(model, name):
def inner(iteration, elapsed):
loss = calculate_loss(plays, model.item_factors, model.user_factors, 0)
print("model %s iteration %i loss %.5f" % (name, iteration, loss))
output[name].append(loss)
return inner
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=True, regularization=0,
iterations=25)
model.cg_steps = steps
model.fit_callback = store_loss(model, 'cg%i' % steps)
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=100, use_native=True, use_gpu=True,
regularization=0, iterations=25)
model.fit_callback = store_loss(model, 'gpu')
model.use_gpu = True
model.fit(plays)
model = AlternatingLeastSquares(factors=100, use_native=True, use_cg=False, regularization=0,
iterations=25)
model.fit_callback = store_loss(model, 'cholesky')
model.fit(plays)
output = defaultdict(list)
for factors in range(32, 257, 32):
for steps in [2, 3, 4]:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cg%i' % steps)
model.cg_steps = steps
model.fit(plays)
model = AlternatingLeastSquares(factors=factors, use_native=True, use_cg=False,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'cholesky')
model.fit(plays)
if has_cuda:
model = AlternatingLeastSquares(factors=factors, use_native=True, use_gpu=True,
regularization=0, iterations=iterations)
model.fit_callback = store_time(model, 'gpu')
model.fit(plays)
# take the min time for the output
output['factors'].append(factors)
for name, stats in times.items():
output[name].append(min(stats[factors]))
return output
variant='20m'):
# read in the input data file
start = time.time()
titles, ratings = get_movielens(variant)
# remove things < min_rating, and convert to implicit dataset
# by considering ratings as a binary preference only
ratings.data[ratings.data < min_rating] = 0
ratings.eliminate_zeros()
ratings.data = np.ones(len(ratings.data))
log.info("read data file in %s", time.time() - start)
# generate a recommender model based off the input params
if model_name == "als":
model = AlternatingLeastSquares()
# lets weight these models by bm25weight.
log.debug("weighting matrix by bm25_weight")
ratings = (bm25_weight(ratings, B=0.9) * 5).tocsr()
elif model_name == "bpr":
model = BayesianPersonalizedRanking()
elif model_name == "lmf":
model = LogisticMatrixFactorization()
elif model_name == "tfidf":
model = TFIDFRecommender()
elif model_name == "cosine":
model = CosineRecommender()
def __init__(self, factors=100, regularization=0.01, dtype=np.float32,
use_native=True, use_cg=True, use_gpu=implicit.cuda.HAS_CUDA,
iterations=15, calculate_training_loss=False, num_threads=0):
super(AlternatingLeastSquares, self).__init__()
# currently there are some issues when training on the GPU when some of the warps
# don't have full factors. Round up to be warp aligned.
# TODO: figure out where the issue is (best guess is in the
# the 'dot' function in 'implicit/cuda/utils/cuh)
if use_gpu and factors % 32:
padding = 32 - factors % 32
log.warning("GPU training requires factor size to be a multiple of 32."
" Increasing factors from %i to %i.", factors, factors + padding)
factors += padding
# parameters on how to factorize
self.factors = factors
self.regularization = regularization
# options on how to fit the model
def __init__(self, *args, **kwargs):
"""
Construct an ALS recommender. The arguments are passed as-is to
:py:class:`implicit.als.AlternatingLeastSquares`.
"""
from implicit.als import AlternatingLeastSquares
super().__init__(AlternatingLeastSquares(*args, **kwargs))