Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_auc_score():
no_users, no_items = (10, 100)
train = sp.rand(no_users, no_items, format='coo')
train.data = np.ones_like(train.data)
model = LightFM(loss='bpr')
model.fit_partial(train)
auc = evaluation.auc_score(model,
train,
num_threads=2)[train.getnnz(axis=1) > 0]
expected_auc = np.array(_auc(model,
train))
assert auc.shape == expected_auc.shape
assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
import numpy as np
from lightfm import LightFM
from fetch_lastfm import fetch_lastfm
data = fetch_lastfm()
model = LightFM(loss='warp')
model.fit(data['matrix'], epochs=30, num_threads=2)
# Get recommendationns function
def get_recommendations(model, coo_mtrx, users_ids):
n_items = coo_mtrx.shape[1]
for user in users_ids:
# TODO create known positives
# Artists the model predicts they will like
scores = model.predict(user, np.arange(n_items))
top_scores = np.argsort(-scores)[:3]
print 'Recomendations for user %s:' % user
#And take a look at the fetch_movielens method to see what it's doing
#
#fetch data and format it
data = fetch_movielens(min_rating=4.0)
#print training and testing data
print(repr(data['train']))
print(repr(data['test']))
#CHALLENGE part 2 of 3 - use 3 different loss functions (so 3 different models), compare results, print results for
#the best one. - Available loss functions are warp, logistic, bpr, and warp-kos.
#create model
model = LightFM(loss='warp')
#train model
model.fit(data['train'], epochs=30, num_threads=2)
#CHALLENGE part 3 of 3 - Modify this function so that it parses your dataset correctly to retrieve
#the necessary variables (products, songs, tv shows, etc.)
#then print out the recommended results
def sample_recommendation(model, data, user_ids):
#number of users and movies in training data
n_users, n_items = data['train'].shape
#generate recommendations for each user we input
for user_id in user_ids:
import lightfm
from scipy import sparse
class LightFM(lightfm.LightFM):
def __init__(self, epochs=1, num_threads=1, *args, **kwargs):
self.epochs = epochs
self.num_threads = num_threads
super(LightFM, self).__init__(*args, **kwargs)
def get_columns(self, X):
if hasattr(X, 'iloc'):
return X.iloc[:, 0].values, X.iloc[:, 1].values
else:
return X[:, 0], X[:, 1]
def fit(self, X, y):
user_ids, item_ids = self.get_columns(X)
X = sparse.csr_matrix((y, (user_ids, item_ids)))
super(LightFM, self).fit(X, epochs=self.epochs, num_threads=self.num_threads)
# -*- coding: utf-8 -*-
import lightfm
from scipy import sparse
class LightFM(lightfm.LightFM):
def __init__(self, epochs=1, num_threads=1, *args, **kwargs):
self.epochs = epochs
self.num_threads = num_threads
super(LightFM, self).__init__(*args, **kwargs)
def get_columns(self, X):
if hasattr(X, 'iloc'):
return X.iloc[:, 0].values, X.iloc[:, 1].values
else:
return X[:, 0], X[:, 1]
def fit(self, X, y):
user_ids, item_ids = self.get_columns(X)
X = sparse.csr_matrix((y, (user_ids, item_ids)))
super(LightFM, self).fit(X, epochs=self.epochs, num_threads=self.num_threads)
M = coo_matrix(
(df_history['target'], (df_history['user_id'], df_history['song_id'])),
shape=(num_users, num_items)
)
user_features = pd.concat([df, df_history])[['msno', 'user_id']].drop_duplicates()
user_features = coo_matrix(
(np.ones(len(user_features)), (user_features['user_id'], user_features['msno'])),
shape=[num_users, num_msno]
)
user_features = sp.hstack([sp.eye(num_users), user_features])
model = LightFM(no_components=50, learning_rate=0.1)
model.fit(
M,
epochs=2,
num_threads=50,
user_features=user_features
)
result = model.predict(
df['user_id'].values,
df['song_id'].values,
user_features=user_features
)
return result
def _prep_for_fit(self, train_obs, **fit_params):
# self.toggle_mkl_blas_1_thread(True)
# assign all observation data
self._set_data(train_obs)
fit_params['sample_weight'] = self.train_mat.tocoo() \
if self.use_sample_weight else None
self._set_fit_params(fit_params)
self._add_external_features()
# init model and set params
self.model = LightFM(**self.model_params)
if self.initialiser_model is not None:
self._initialise_from_model(train_obs)
course_features_unique = [course for course in course_features if course[0] not in seen_course and not seen_course.add(course[0])]
course_features_df = DataFrame.from_records(course_features_unique, index="id", columns=["id", "instructor_id", "primary_department_id", "course_rating", "useful_rating", "engagement_rating", "difficulty_rating", "competency_rating", "lecturing_rating", "ethusiasm_rating", "approachable_rating"])
course_features_df_binarized = get_dummies(course_features_df, columns=['instructor_id', 'primary_department_id'], sparse=True)
index = array(course_features_df_binarized.index)
index_as_column_array = index.reshape((-1,1))
course_features_df_binarized_as_2d_array = array(append(index_as_column_array, course_features_df_binarized.values, axis=1), dtype=float32)
course_features_unnormalized = self.np_2d_array_to_sparse_matrix(course_features_df_binarized_as_2d_array)
data['course_features'] = course_features_unnormalized #preprocessing.scale(course_features_unnormalized, with_mean=False)
course_names = array([review.course.name for review in reviews], dtype=object)
data['course_names'] = course_names
model = LightFM(loss='warp')
model.fit(data['train'], item_features=data['course_features'], epochs=30, num_threads=2, verbose=False)
# ZQ 1637, Kent 2427, Matt 1724, Patrick 1605
for user_id in [2427, 1637, 1724, 1605]:
#user_id = 2427
recommendation_count = 30
scores_for_kent = model.predict(user_id, course_ids, item_features=data['course_features'], num_threads=2)
recommendations_for_kent = data['course_names'][argsort(-scores_for_kent)]
#for recommendation in unique(recommendations_for_kent)[:recommendation_count]: print("%s, " % recommendation)
user = User.objects.get(id=user_id)
print "Top 25 recommendations next semester for " + user.email
reviews_by_user = CourseReview.objects.filter(author=user)
courses_taken_by_user = map(lambda review: review.course, reviews_by_user)
course_id_of_recommendations_for_kent = course_ids[argsort(-scores_for_kent)]
def build(self):
self._model = LightFM(no_components=self.rank,
item_alpha=self.item_alpha,
user_alpha=self.user_alpha,
loss=self.loss,
learning_rate=self.learning_rate,
learning_schedule=self.learning_schedule,
max_sampled=self.max_sampled,
random_state=self.seed)
fit = getattr(self._model, self.fit_method)
matrix = self.get_training_matrix()
if self.item_features is not None:
item_features = self.item_features.reindex(self.data.index.itemid.old.values, fill_value=[])
self._item_features_csr, self.item_feature_labels = stack_features(item_features,
add_identity=self.item_identity,
normalize=True,