From efb720d41963886ba9b318b7f390576f9d8ea5c5 Mon Sep 17 00:00:00 2001 From: Aram Zegerius Date: Wed, 4 Sep 2019 22:46:49 +0200 Subject: [PATCH] Improve SAR performance (#914) * Improve score() performance Co-authored-by: Overv * Simplify remove_seen --- reco_utils/recommender/sar/sar_singlenode.py | 21 ++++++-------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/reco_utils/recommender/sar/sar_singlenode.py b/reco_utils/recommender/sar/sar_singlenode.py index 0e51353558..0248ac7ea5 100644 --- a/reco_utils/recommender/sar/sar_singlenode.py +++ b/reco_utils/recommender/sar/sar_singlenode.py @@ -106,9 +106,6 @@ def __init__( # the opposite of the above map - map array index to actual string ID self.index2item = None - # track user-item pairs seen during training - self.seen_items = None - def compute_affinity_matrix(self, df, rating_col): """ Affinity matrix. @@ -244,9 +241,6 @@ def fit(self, df): temp_df = self.compute_time_decay(df=temp_df, decay_column=self.col_unity_rating) self.unity_user_affinity = self.compute_affinity_matrix(df=temp_df, rating_col=self.col_unity_rating) - # retain seen items for removal at prediction time - self.seen_items = temp_df[[self.col_user_id, self.col_item_id]].values - # affinity matrix logger.info("Building user affinity sparse matrix") self.user_affinity = self.compute_affinity_matrix(df=temp_df, rating_col=self.col_rating) @@ -301,20 +295,17 @@ def score(self, test, remove_seen=False, normalize=False): # calculate raw scores with a matrix multiplication logger.info("Calculating recommendation scores") - # TODO: only compute scores for users in test - test_scores = self.user_affinity.dot(self.item_similarity) - - # remove items in the train set so recommended items are always novel - if remove_seen: - logger.info("Removing seen items") - test_scores[self.seen_items[:, 0], self.seen_items[:, 1]] = -np.inf - - test_scores = test_scores[user_ids, :] + test_scores = self.user_affinity[user_ids, :].dot(self.item_similarity) # ensure we're working with a dense ndarray if isinstance(test_scores, sparse.spmatrix): test_scores = test_scores.toarray() + # remove items in the train set so recommended items are always novel + if remove_seen: + logger.info("Removing seen items") + test_scores += self.user_affinity[user_ids, :] * -np.inf + if normalize: if self.unity_user_affinity is None: raise ValueError('Cannot use normalize flag during scoring if it was not set at model instantiation')