Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if recalculate_user:
return self.recalculate_user(userid, user_items)
else:
return self.user_factors[userid]
def recalculate_user(self, userid, user_items):
raise NotImplementedError("recalculate_user is not supported with this model")
def similar_users(self, userid, N=10):
factor = self.user_factors[userid]
factors = self.user_factors
norms = self.user_norms
return self._get_similarity_score(factor, factors, norms, N)
similar_users.__doc__ = RecommenderBase.similar_users.__doc__
def similar_items(self, itemid, N=10):
factor = self.item_factors[itemid]
factors = self.item_factors
norms = self.item_norms
return self._get_similarity_score(factor, factors, norms, N)
similar_items.__doc__ = RecommenderBase.similar_items.__doc__
def _get_similarity_score(self, factor, factors, norms, N):
scores = factors.dot(factor) / norms
best = np.argpartition(scores, -N)[-N:]
return sorted(zip(best, scores[best]), key=lambda x: -x[1])
@property
Parameters
----------
itemid : int
The row id of the item to retrieve similar items for
N : int, optional
The number of similar items to return
Returns
-------
list
List of (itemid, score) tuples
"""
pass
class MatrixFactorizationBase(RecommenderBase):
""" MatrixFactorizationBase contains common functionality for recommendation models.
Attributes
----------
item_factors : ndarray
Array of latent factors for each item in the training set
user_factors : ndarray
Array of latent factors for each user in the training set
"""
def __init__(self):
# learned parameters
self.item_factors = None
self.user_factors = None
# cache of user, item norms (useful for calculating similar items)
self._user_norms, self._item_norms = None, None
import itertools
import numpy
from numpy import bincount, log, log1p, sqrt
from scipy.sparse import coo_matrix, csr_matrix
from ._nearest_neighbours import NearestNeighboursScorer, all_pairs_knn
from .recommender_base import RecommenderBase
from .utils import nonzeros
class ItemItemRecommender(RecommenderBase):
""" Base class for Item-Item Nearest Neighbour recommender models
here.
Parameters
----------
K : int, optional
The number of neighbours to include when calculating the item-item
similarity matrix
num_threads : int, optional
The number of threads to use for fitting the model. Specifying 0
means to default to the number of cores on the machine.
"""
def __init__(self, K=20, num_threads=0):
self.similarity = None
self.K = K
self.num_threads = num_threads
def rank_items(self, userid, user_items, selected_items, recalculate_user=False):
user = self._user_factor(userid, user_items, recalculate_user)
# check selected items are in the model
if max(selected_items) >= user_items.shape[1] or min(selected_items) < 0:
raise IndexError("Some of selected itemids are not in the model")
item_factors = self.item_factors[selected_items]
# calculate relevance scores of given items w.r.t the user
scores = item_factors.dot(user)
# return sorted results
return sorted(zip(selected_items, scores), key=lambda x: -x[1])
recommend.__doc__ = RecommenderBase.recommend.__doc__
def _user_factor(self, userid, user_items, recalculate_user=False):
if recalculate_user:
return self.recalculate_user(userid, user_items)
else:
return self.user_factors[userid]
def recalculate_user(self, userid, user_items):
raise NotImplementedError("recalculate_user is not supported with this model")
def similar_users(self, userid, N=10):
factor = self.user_factors[userid]
factors = self.user_factors
norms = self.user_norms
return self._get_similarity_score(factor, factors, norms, N)
factor = self.user_factors[userid]
factors = self.user_factors
norms = self.user_norms
return self._get_similarity_score(factor, factors, norms, N)
similar_users.__doc__ = RecommenderBase.similar_users.__doc__
def similar_items(self, itemid, N=10):
factor = self.item_factors[itemid]
factors = self.item_factors
norms = self.item_norms
return self._get_similarity_score(factor, factors, norms, N)
similar_items.__doc__ = RecommenderBase.similar_items.__doc__
def _get_similarity_score(self, factor, factors, norms, N):
scores = factors.dot(factor) / norms
best = np.argpartition(scores, -N)[-N:]
return sorted(zip(best, scores[best]), key=lambda x: -x[1])
@property
def user_norms(self):
if self._user_norms is None:
self._user_norms = np.linalg.norm(self.user_factors, axis=-1)
# don't divide by zero in similar_items, replace with small value
self._user_norms[self._user_norms == 0] = 1e-10
return self._user_norms
@property
def item_norms(self):