From 4c875df2d0b6b3ddad70a49a4353c5de19c28e61 Mon Sep 17 00:00:00 2001 From: Aram Zegerius Date: Wed, 4 Sep 2019 12:10:23 +0200 Subject: [PATCH 1/2] Improve score() performance Co-authored-by: Overv --- reco_utils/recommender/sar/sar_singlenode.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/reco_utils/recommender/sar/sar_singlenode.py b/reco_utils/recommender/sar/sar_singlenode.py index 0e51353558..d05ba789f3 100644 --- a/reco_utils/recommender/sar/sar_singlenode.py +++ b/reco_utils/recommender/sar/sar_singlenode.py @@ -244,18 +244,21 @@ def fit(self, df): temp_df = self.compute_time_decay(df=temp_df, decay_column=self.col_unity_rating) self.unity_user_affinity = self.compute_affinity_matrix(df=temp_df, rating_col=self.col_unity_rating) - # retain seen items for removal at prediction time - self.seen_items = temp_df[[self.col_user_id, self.col_item_id]].values - # affinity matrix logger.info("Building user affinity sparse matrix") self.user_affinity = self.compute_affinity_matrix(df=temp_df, rating_col=self.col_rating) + # retain seen items for removal at prediction time + seen = temp_df[[self.col_user_id, self.col_item_id]].values + self.seen_items = np.ones(self.user_affinity.shape) + self.seen_items[seen[:, 0], seen[:, 1]] = -np.inf + # calculate item co-occurrence logger.info("Calculating item co-occurrence") item_cooccurrence = self.compute_coocurrence_matrix(df=temp_df) # free up some space + del seen del temp_df self.item_frequencies = item_cooccurrence.diagonal() @@ -301,20 +304,17 @@ def score(self, test, remove_seen=False, normalize=False): # calculate raw scores with a matrix multiplication logger.info("Calculating recommendation scores") - # TODO: only compute scores for users in test - test_scores = self.user_affinity.dot(self.item_similarity) - - # remove items in the train set so recommended items are always novel - if remove_seen: - logger.info("Removing seen items") - test_scores[self.seen_items[:, 0], self.seen_items[:, 1]] = -np.inf - - test_scores = test_scores[user_ids, :] + test_scores = self.user_affinity[user_ids, :].dot(self.item_similarity) # ensure we're working with a dense ndarray if isinstance(test_scores, sparse.spmatrix): test_scores = test_scores.toarray() + # remove items in the train set so recommended items are always novel + if remove_seen: + logger.info("Removing seen items") + test_scores = test_scores * self.seen_items[user_ids, :] + if normalize: if self.unity_user_affinity is None: raise ValueError('Cannot use normalize flag during scoring if it was not set at model instantiation') From 97bcefc499b73085b0cf6daaf3391ccc00c81afc Mon Sep 17 00:00:00 2001 From: Aram Zegerius Date: Wed, 4 Sep 2019 22:07:03 +0200 Subject: [PATCH 2/2] Simplify remove_seen --- reco_utils/recommender/sar/sar_singlenode.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/reco_utils/recommender/sar/sar_singlenode.py b/reco_utils/recommender/sar/sar_singlenode.py index d05ba789f3..0248ac7ea5 100644 --- a/reco_utils/recommender/sar/sar_singlenode.py +++ b/reco_utils/recommender/sar/sar_singlenode.py @@ -106,9 +106,6 @@ def __init__( # the opposite of the above map - map array index to actual string ID self.index2item = None - # track user-item pairs seen during training - self.seen_items = None - def compute_affinity_matrix(self, df, rating_col): """ Affinity matrix. @@ -248,17 +245,11 @@ def fit(self, df): logger.info("Building user affinity sparse matrix") self.user_affinity = self.compute_affinity_matrix(df=temp_df, rating_col=self.col_rating) - # retain seen items for removal at prediction time - seen = temp_df[[self.col_user_id, self.col_item_id]].values - self.seen_items = np.ones(self.user_affinity.shape) - self.seen_items[seen[:, 0], seen[:, 1]] = -np.inf - # calculate item co-occurrence logger.info("Calculating item co-occurrence") item_cooccurrence = self.compute_coocurrence_matrix(df=temp_df) # free up some space - del seen del temp_df self.item_frequencies = item_cooccurrence.diagonal() @@ -313,7 +304,7 @@ def score(self, test, remove_seen=False, normalize=False): # remove items in the train set so recommended items are always novel if remove_seen: logger.info("Removing seen items") - test_scores = test_scores * self.seen_items[user_ids, :] + test_scores += self.user_affinity[user_ids, :] * -np.inf if normalize: if self.unity_user_affinity is None: