Skip to content

Commit

Permalink
Update implicit-feedback run
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed May 6, 2024
1 parent b9df58e commit da9c4d5
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 21 deletions.
68 changes: 47 additions & 21 deletions lenskit/algorithms/item_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,10 @@ def _predict_weighted_average(
# save the fast-path items
if torch.any(fast):
ris_fast = row_is[fast]
vs_fast = row_vs[fast]
avs_fast = row_avs[fast]
vals_fast = row_vs[fast] * rate_v[i]
nbr_sims[ris_fast, counts[ris_fast]] = avs_fast
vals_fast = vs_fast * rate_v[i]
nbr_sims[ris_fast, counts[ris_fast]] = vs_fast
nbr_vals[ris_fast, counts[ris_fast]] = vals_fast
counts[ris_fast] += 1
t_sims[ris_fast] += avs_fast
Expand All @@ -198,7 +199,7 @@ def _predict_weighted_average(
ris_exc = ris_slow[exc]
ravs_exc = row_avs[slow][exc]
rvs_exc = row_vs[slow][exc]
t_sims[ris_exc] += ravs_exc - min_sims[exc]
t_sims[ris_exc] += ravs_exc - min_sims[exc].abs()
scores[ris_exc] += rvs_exc * rate_v[i] - min_vals[exc]
# and save
nbr_sims[ris_exc, mins[exc]] = ravs_exc
Expand All @@ -212,6 +213,7 @@ def _predict_weighted_average(
return scores


@torch.jit.script
def _predict_sum(
model: torch.Tensor,
nrange: tuple[int, int],
Expand All @@ -222,36 +224,60 @@ def _predict_sum(
nitems, _ni = model.shape
assert nitems == _ni
min_nbrs, max_nbrs = nrange
t_sims = np.full(nitems, np.nan, dtype=np.float_)

_logger.debug("rated: %s", rated)
_logger.debug("ratev: %s", rate_v)
_msg(logging.DEBUG, f"sum-scoring with {len(rated)} items")

# we proceed rating-by-rating, and accumulate results
t_sims = torch.zeros(nitems)
counts = torch.zeros(nitems, dtype=torch.int32)
nbr_sims = torch.zeros((nitems, max_nbrs))

# fast path: compute everything that we can
for i, iidx in enumerate(rated):
iidx = int(iidx)
row = model[iidx]
row_is = row.indices()
row_is = row.indices()[0]
row_vs = row.values()
_logger.debug("item %d: %d neighbors", iidx, len(row_is))
assert row_is.shape == row_vs.shape

counts[row_is] += 1
t_sims[row_is] += torch.abs(row_vs)
fast = counts[row_is] < max_nbrs

# slow-path items that have too many sims
if torch.any(counts > max_nbrs):
n = torch.sum(counts > max_nbrs)
_msg(logging.WARNING, f"{n} items have too many neighbors")
# save the fast-path items
if torch.any(fast):
ris_fast = row_is[fast]
vs_fast = row_vs[fast]
nbr_sims[ris_fast, counts[ris_fast]] = vs_fast
counts[ris_fast] += 1
t_sims[ris_fast] += vs_fast

# compute averages for items that don't match the threshold
_logger.debug("sims: %s", t_sims)
_logger.debug("counts: %s", counts)
mask = counts >= min_nbrs
# skip early if we're done
if torch.all(fast):
continue

# now we have the slow-path items
slow = torch.logical_not(fast)
ris_slow = row_is[slow]
rvs_slow = row_vs[slow]
# this is brute-force linear search for simplicity right now
# for each, find the neighbor that's the smallest:
mins = torch.argmin(nbr_sims[ris_slow], dim=1)
# find the items where this neighbor exceeds the smallest so far:
min_sims = nbr_sims[ris_slow, mins]
exc = min_sims < rvs_slow
if not torch.any(exc):
continue

# now we need to update values: add in new and remove old
# anywhere our new neighbor is grater than smallest, replace smallest
ris_exc = ris_slow[exc]
rvs_exc = rvs_slow[exc]
t_sims[ris_exc] -= min_sims[exc]
t_sims[ris_exc] += rvs_exc
# and save
nbr_sims[ris_exc, mins[exc]] = rvs_exc

# compute averages for items that pass match the threshold
t_sims[counts < min_nbrs] = torch.nan

return torch.where(mask, t_sims, torch.nan)
return t_sims


_predictors: dict[str, AggFun] = {
Expand Down
41 changes: 41 additions & 0 deletions tests/test_knn_item_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,47 @@ def test_ii_large_models(rng):
raise AssertionError(f"missing {np.sum(missing)} unbounded values")


@lktu.wantjit
@mark.slow
def test_ii_implicit_large(rng):
"Test that implicit-feedback mode works on full test data."
_log.info("training model")
NBRS = 5
NUSERS = 25
NRECS = 50
algo = knn.ItemItem(NBRS, feedback="implicit")
_log.info("agg: %s", algo.aggregate)
algo = Recommender.adapt(algo)
algo.fit(ml_ratings[["user", "item"]])

users = rng.choice(ml_ratings["user"].unique(), NUSERS)

items: pd.Index = algo.predictor.item_index_
mat: torch.Tensor = algo.predictor.sim_matrix_.to_dense()

for user in users:
recs = algo.recommend(user, NRECS)
_log.info("user %s recs\n%s", user, recs)
assert len(recs) == NRECS
urates = ml_ratings[ml_ratings["user"] == user]

smat = mat[torch.from_numpy(items.get_indexer_for(urates["item"].values)), :]
for row in recs.itertuples():
col = smat[:, items.get_loc(row.item)]
top, _is = torch.topk(col, NBRS)
score = top.sum()
try:
assert row.score == approx(score)
except AssertionError as e:
_log.error("test failed for user %s item %s", user, row.item)
_log.info("score: %.6f", row.score)
_log.info("sims:\n%s", col)
_log.info("total: %.3f", col.sum())
_log.info("filtered: %s", top)
_log.info("filtered sum: %.3f", top.sum())
raise e


@lktu.wantjit
def test_ii_save_load(tmp_path, ml_subset):
"Save and load a model"
Expand Down

0 comments on commit da9c4d5

Please sign in to comment.