Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove drop_duplicates() from SAR method fix #1464 #1588

Merged
merged 10 commits into from
Feb 28, 2022
Prev Previous commit
Next Next commit
Remove drop_duplicates() from SAR method
simonzhaoms committed Feb 23, 2022
commit 0c4d707892e4c8bb37f14d6034301cb19940323e
18 changes: 14 additions & 4 deletions recommenders/models/sar/sar_singlenode.py
Original file line number Diff line number Diff line change
@@ -293,6 +293,10 @@ def fit(self, df):
def score(self, test, remove_seen=False):
"""Score all items for test users.

.. note::

Please make sure that `test` has no duplicate users.

Args:
test (pandas.DataFrame): user to test
remove_seen (bool): flag to remove items seen in training from recommendation
@@ -305,7 +309,7 @@ def score(self, test, remove_seen=False):
user_ids = list(
map(
lambda user: self.user2index.get(user, np.NaN),
test[self.col_user].unique(),
test[self.col_user],
)
)
if any(np.isnan(user_ids)):
@@ -448,6 +452,10 @@ def get_item_based_topk(self, items, top_k=10, sort_top_k=True):
def recommend_k_items(self, test, top_k=10, sort_top_k=True, remove_seen=False):
"""Recommend top K items for all users which are in the test set

.. note::

Please make sure that `test` has no duplicate users.

Args:
test (pandas.DataFrame): users to test
top_k (int): number of top items to recommend
@@ -466,9 +474,7 @@ def recommend_k_items(self, test, top_k=10, sort_top_k=True, remove_seen=False):

df = pd.DataFrame(
{
self.col_user: np.repeat(
test[self.col_user].drop_duplicates().values, top_items.shape[1]
),
self.col_user: np.repeat(test[self.col_user].values, top_items.shape[1]),
self.col_item: [self.index2item[item] for item in top_items.flatten()],
self.col_prediction: top_scores.flatten(),
}
@@ -480,6 +486,10 @@ def recommend_k_items(self, test, top_k=10, sort_top_k=True, remove_seen=False):
def predict(self, test):
"""Output SAR scores for only the users-items pairs which are in the test set

.. note::

Please make sure that `test` has no duplicates.

Args:
test (pandas.DataFrame): DataFrame that contains users and items to test