diff --git a/tests/test_als_explicit.py b/tests/test_als_explicit.py index 762e4f296..84411526a 100644 --- a/tests/test_als_explicit.py +++ b/tests/test_als_explicit.py @@ -22,11 +22,11 @@ _log = logging.getLogger(__name__) -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) -methods = mark.parametrize('m', ['lu', 'cd']) +methods = mark.parametrize("m", ["lu", "cd"]) @methods @@ -80,7 +80,7 @@ def test_als_predict_basic_for_new_ratings(): assert algo.bias.mean_ == approx(simple_df.rating.mean()) - new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values + new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values preds = algo.predict_for_user(15, [3], new_ratings) @@ -100,7 +100,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings(): preds = algo.predict_for_user(u, [i]) new_u_id = -1 - new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values + new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values new_preds = algo.predict_for_user(new_u_id, [i], new_ratings) @@ -127,9 +127,13 @@ def test_als_predict_for_new_users_with_new_ratings(): user_data = ratings[ratings.user == u] - _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])) + _log.debug( + "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]) + ) - new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values + new_ratings = pd.Series( + user_data.rating.to_numpy(), index=user_data.item + ) # items as index and ratings as values new_preds = algo.predict_for_user(new_u_id, items, new_ratings) _log.debug("preds: " + str(preds.values)) @@ -186,9 +190,13 @@ def test_als_predict_no_user_features_basic(): user_data = ratings[ratings.user == u] - _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])) + _log.debug( + "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]) + ) - new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values + new_ratings = pd.Series( + user_data.rating.to_numpy(), index=user_data.item + ) # items as index and ratings as values new_preds = algo_no_user_features.predict_for_user(new_u_id, items, new_ratings) _log.debug("preds: " + str(preds.values)) @@ -209,8 +217,8 @@ def test_als_train_large(): assert algo.n_items == ratings.item.nunique() assert algo.n_users == ratings.user.nunique() - icounts = ratings.groupby('item').rating.count() - isums = ratings.groupby('item').rating.sum() + icounts = ratings.groupby("item").rating.count() + isums = ratings.groupby("item").rating.sum() is2 = isums - icounts * ratings.rating.mean() imeans = is2 / (icounts + 5) ibias = pd.Series(algo.bias.item_offsets_, index=algo.item_index_) @@ -220,14 +228,14 @@ def test_als_train_large(): # don't use wantjit, use this to do a non-JIT test def test_als_save_load(): - original = als.BiasedMF(5, iterations=5, method='lu') + original = als.BiasedMF(5, iterations=5, method="lu") ratings = lktu.ml_test.ratings original.fit(ratings) assert original.bias.mean_ == approx(ratings.rating.mean()) mod = pickle.dumps(original) - _log.info('serialized to %d bytes', len(mod)) + _log.info("serialized to %d bytes", len(mod)) algo = pickle.loads(mod) assert algo.bias.mean_ == original.bias.mean_ @@ -239,26 +247,26 @@ def test_als_save_load(): assert np.all(algo.user_index_ == original.user_index_) # make sure it still works - preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8')) + preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8")) assert len(preds) == 50 -@mark.skipif(not binpickle, reason='binpickle not available') +@mark.skipif(not binpickle, reason="binpickle not available") def test_als_binpickle(tmp_path): "Test saving ALS with BinPickle" - original = als.BiasedMF(20, iterations=5, method='lu') + original = als.BiasedMF(20, iterations=5, method="lu") ratings = lktu.ml_test.ratings original.fit(ratings) assert original.bias.mean_ == approx(ratings.rating.mean()) - file = tmp_path / 'als.bpk' + file = tmp_path / "als.bpk" binpickle.dump(original, file) with binpickle.BinPickleFile(file) as bpf: # the pickle data should be small - _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length) + _log.info("serialized to %d pickle bytes", bpf.entries[-1].dec_length) pickle_dis(bpf._read_buffer(bpf.entries[-1])) assert bpf.entries[-1].dec_length < 2048 @@ -273,27 +281,27 @@ def test_als_binpickle(tmp_path): assert np.all(algo.user_index_ == original.user_index_) # make sure it still works - preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8')) + preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8")) assert len(preds) == 50 @lktu.wantjit @mark.slow def test_als_method_match(): - lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method='lu', rng_spec=42) - cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method='cd', rng_spec=42) + lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method="lu", rng_spec=42) + cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method="cd", rng_spec=42) ratings = lktu.ml_test.ratings timer = Stopwatch() lu.fit(ratings) timer.stop() - _log.info('fit with LU solver in %s', timer) + _log.info("fit with LU solver in %s", timer) timer = Stopwatch() cd.fit(ratings) timer.stop() - _log.info('fit with CD solver in %s', timer) + _log.info("fit with CD solver in %s", timer) assert lu.bias.mean_ == approx(ratings.rating.mean()) assert cd.bias.mean_ == approx(ratings.rating.mean()) @@ -307,29 +315,31 @@ def test_als_method_match(): cd_preds = cd.predict_for_user(u, items) diff = lu_preds - cd_preds adiff = np.abs(diff) - _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u, - np.linalg.norm(diff, 2), - np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9)) - - preds.append(pd.DataFrame({ - 'user': u, - 'item': items, - 'lu': lu_preds, - 'cd': cd_preds, - 'adiff': adiff - })) + _log.info( + "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f", + u, + np.linalg.norm(diff, 2), + np.min(adiff), + np.median(adiff), + np.max(adiff), + np.quantile(adiff, 0.9), + ) + + preds.append( + pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cd": cd_preds, "adiff": adiff}) + ) preds = pd.concat(preds, ignore_index=True) - _log.info('LU preds:\n%s', preds.lu.describe()) - _log.info('CD preds:\n%s', preds.cd.describe()) - _log.info('overall differences:\n%s', preds.adiff.describe()) + _log.info("LU preds:\n%s", preds.lu.describe()) + _log.info("CD preds:\n%s", preds.cd.describe()) + _log.info("overall differences:\n%s", preds.adiff.describe()) # there are differences. our check: the 90% are under a quarter star assert np.quantile(adiff, 0.9) <= 0.27 @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_als_batch_accuracy(): from lenskit.algorithms import bias import lenskit.crossfold as xf @@ -337,30 +347,30 @@ def test_als_batch_accuracy(): ratings = lktu.ml100k.ratings - lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu') - cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd') + lu_algo = als.BiasedMF(25, iterations=20, damping=5, method="lu") + cd_algo = als.BiasedMF(25, iterations=25, damping=5, method="cd") # algo = bias.Fallback(svd_algo, bias.Bias(damping=5)) def eval(train, test): - _log.info('training LU') + _log.info("training LU") lu_algo.fit(train) - _log.info('training CD') + _log.info("training CD") cd_algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test)) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) preds = pd.concat(eval(train, test) for (train, test) in folds) - preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred) - _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False)) - _log.info('diff summary:\n%s', preds.abs_diff.describe()) + preds["abs_diff"] = np.abs(preds.lu_pred - preds.cd_pred) + _log.info("predictions:\n%s", preds.sort_values("abs_diff", ascending=False)) + _log.info("diff summary:\n%s", preds.abs_diff.describe()) lu_mae = pm.mae(preds.lu_pred, preds.rating) assert lu_mae == approx(0.73, abs=0.045) cd_mae = pm.mae(preds.cd_pred, preds.rating) assert cd_mae == approx(0.73, abs=0.045) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.lu_pred, df.rating)) assert user_rmse.mean() == approx(0.94, abs=0.05) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.cd_pred, df.rating)) assert user_rmse.mean() == approx(0.94, abs=0.05) diff --git a/tests/test_als_implicit.py b/tests/test_als_implicit.py index d72ab6757..94006cb69 100644 --- a/tests/test_als_implicit.py +++ b/tests/test_als_implicit.py @@ -20,12 +20,11 @@ _log = logging.getLogger(__name__) -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13]}) +simple_df = pd.DataFrame({"item": [1, 1, 2, 3], "user": [10, 12, 10, 13]}) simple_dfr = simple_df.assign(rating=[4.0, 3.0, 5.0, 2.0]) -methods = mark.parametrize('m', ['lu', 'cg']) +methods = mark.parametrize("m", ["lu", "cg"]) @methods @@ -52,7 +51,7 @@ def test_als_predict_basic(): def test_als_predict_basic_for_new_ratings(): - """ Test ImplicitMF ability to support new ratings """ + """Test ImplicitMF ability to support new ratings""" algo = als.ImplicitMF(20, iterations=10) algo.fit(simple_df) @@ -115,7 +114,7 @@ def test_als_predict_for_new_users_with_new_ratings(): _log.debug("user_features from fit: " + str(algo.user_features_[upos, :])) # get the user's rating series - new_ratings = user_data.set_index('item')['rating'].copy() + new_ratings = user_data.set_index("item")["rating"].copy() new_preds = algo.predict_for_user(new_u_id, items, new_ratings) _log.debug("preds: " + str(preds.values)) @@ -151,26 +150,28 @@ def test_als_recs_topn_for_new_users_with_new_ratings(rng): recs = rec_algo.recommend(u, 10) user_data = ratings[ratings.user == u] upos = algo.user_index_.get_loc(u) - _log.info('user %s: %s ratings', u, len(user_data)) + _log.info("user %s: %s ratings", u, len(user_data)) _log.debug("user_features from fit: " + str(algo.user_features_[upos, :])) # get the user's rating series - new_ratings = user_data.set_index('item')['rating'].copy() + new_ratings = user_data.set_index("item")["rating"].copy() new_recs = rec_algo.recommend(new_u_id, 10, ratings=new_ratings) # merge new & old recs - all_recs = pd.merge(recs.rename(columns={'score': 'old_score'}), - new_recs.rename(columns={'score': 'new_score'}), - how='outer').fillna(-np.inf) + all_recs = pd.merge( + recs.rename(columns={"score": "old_score"}), + new_recs.rename(columns={"score": "new_score"}), + how="outer", + ).fillna(-np.inf) tau = stats.kendalltau(all_recs.old_score, all_recs.new_score) - _log.info('correlation for user %s: %f', u, tau.correlation) + _log.info("correlation for user %s: %f", u, tau.correlation) correlations.loc[u] = tau.correlation - _log.debug('correlations: %s', correlations) + _log.debug("correlations: %s", correlations) - assert not(any(correlations.isnull())) + assert not (any(correlations.isnull())) assert all(correlations >= 0.5) @@ -206,7 +207,7 @@ def test_als_predict_no_user_features_basic(): preds = algo.predict_for_user(u, items) user_data = ratings[ratings.user == u] - new_ratings = user_data.set_index('item')['rating'].copy() + new_ratings = user_data.set_index("item")["rating"].copy() algo_no_user_features = als.ImplicitMF(5, iterations=10, method="lu", save_user_features=False) algo_no_user_features.fit(ratings) @@ -236,7 +237,7 @@ def test_als_save_load(tmp_path): ratings = lktu.ml_test.ratings algo.fit(ratings) - fn = tmp_path / 'model.bpk' + fn = tmp_path / "model.bpk" binpickle.dump(algo, fn, codec=None) restored = binpickle.load(fn) @@ -250,7 +251,7 @@ def test_als_save_load(tmp_path): def test_als_train_large_noratings(): algo = als.ImplicitMF(20, iterations=20) ratings = lktu.ml_test.ratings - ratings = ratings.loc[:, ['user', 'item']] + ratings = ratings.loc[:, ["user", "item"]] algo.fit(ratings) assert len(algo.user_index_) == ratings.user.nunique() @@ -274,20 +275,20 @@ def test_als_train_large_ratings(): @lktu.wantjit @mark.slow def test_als_method_match(): - lu = als.ImplicitMF(20, iterations=15, method='lu', rng_spec=42) - cg = als.ImplicitMF(20, iterations=15, method='cg', rng_spec=42) + lu = als.ImplicitMF(20, iterations=15, method="lu", rng_spec=42) + cg = als.ImplicitMF(20, iterations=15, method="cg", rng_spec=42) ratings = lktu.ml_test.ratings timer = Stopwatch() lu.fit(ratings) timer.stop() - _log.info('fit with LU solver in %s', timer) + _log.info("fit with LU solver in %s", timer) timer = Stopwatch() cg.fit(ratings) timer.stop() - _log.info('fit with CG solver in %s', timer) + _log.info("fit with CG solver in %s", timer) preds = [] @@ -298,30 +299,32 @@ def test_als_method_match(): cd_preds = cg.predict_for_user(u, items) diff = lu_preds - cd_preds adiff = np.abs(diff) - _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u, - np.linalg.norm(diff, 2), - np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9)) - - preds.append(pd.DataFrame({ - 'user': u, - 'item': items, - 'lu': lu_preds, - 'cg': cd_preds, - 'adiff': adiff - })) - _log.info('user %s tau: %s', u, stats.kendalltau(lu_preds, cd_preds)) + _log.info( + "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f", + u, + np.linalg.norm(diff, 2), + np.min(adiff), + np.median(adiff), + np.max(adiff), + np.quantile(adiff, 0.9), + ) + + preds.append( + pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cg": cd_preds, "adiff": adiff}) + ) + _log.info("user %s tau: %s", u, stats.kendalltau(lu_preds, cd_preds)) preds = pd.concat(preds, ignore_index=True) - _log.info('LU preds:\n%s', preds.lu.describe()) - _log.info('CD preds:\n%s', preds.cg.describe()) - _log.info('overall differences:\n%s', preds.adiff.describe()) + _log.info("LU preds:\n%s", preds.lu.describe()) + _log.info("CD preds:\n%s", preds.cg.describe()) + _log.info("overall differences:\n%s", preds.adiff.describe()) # there are differences. our check: the 90% are reasonable assert np.quantile(adiff, 0.9) < 0.5 @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_als_implicit_batch_accuracy(): import lenskit.crossfold as xf from lenskit import batch @@ -330,31 +333,31 @@ def test_als_implicit_batch_accuracy(): ratings = lktu.ml100k.ratings def eval(train, test): - train = train.astype({'rating': np.float_}) - _log.info('training CG') - cg_algo = als.ImplicitMF(25, iterations=20, method='cg') + train = train.astype({"rating": np.float_}) + _log.info("training CG") + cg_algo = als.ImplicitMF(25, iterations=20, method="cg") cg_algo = Recommender.adapt(cg_algo) cg_algo.fit(train) - _log.info('training LU') - lu_algo = als.ImplicitMF(25, iterations=20, method='lu') + _log.info("training LU") + lu_algo = als.ImplicitMF(25, iterations=20, method="lu") lu_algo = Recommender.adapt(lu_algo) lu_algo.fit(train) users = test.user.unique() - _log.info('testing %d users', len(users)) + _log.info("testing %d users", len(users)) cg_recs = batch.recommend(cg_algo, users, 100, n_jobs=2) lu_recs = batch.recommend(lu_algo, users, 100, n_jobs=2) - return pd.concat({'CG': cg_recs, 'LU': lu_recs}, names=['Method']).reset_index('Method') + return pd.concat({"CG": cg_recs, "LU": lu_recs}, names=["Method"]).reset_index("Method") folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) test = pd.concat(te for (tr, te) in folds) recs = pd.concat((eval(train, test) for (train, test) in folds), ignore_index=True) - _log.info('analyzing recommendations') + _log.info("analyzing recommendations") rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) - results = results.groupby('Method')['ndcg'].mean() - _log.info('LU nDCG for users is %.4f', results.loc['LU'].mean()) - _log.info('CG nDCG for users is %.4f', results.loc['CG'].mean()) + results = results.groupby("Method")["ndcg"].mean() + _log.info("LU nDCG for users is %.4f", results.loc["LU"].mean()) + _log.info("CG nDCG for users is %.4f", results.loc["CG"].mean()) assert all(results > 0.28) - assert results.loc['LU'] == approx(results.loc['CG'], rel=0.05) + assert results.loc["LU"] == approx(results.loc["CG"], rel=0.05) diff --git a/tests/test_batch_predict.py b/tests/test_batch_predict.py index 6daa308bd..e176a6370 100644 --- a/tests/test_batch_predict.py +++ b/tests/test_batch_predict.py @@ -12,7 +12,7 @@ _log = logging.getLogger(__name__) -MLB = namedtuple('MLB', ['ratings', 'algo']) +MLB = namedtuple("MLB", ["ratings", "algo"]) @pytest.fixture @@ -24,12 +24,12 @@ def mlb(): def test_predict_single(mlb): - tf = pd.DataFrame({'user': [1], 'item': [31]}) + tf = pd.DataFrame({"user": [1], "item": [31]}) res = lkb.predict(mlb.algo, tf) assert len(res) == 1 assert all(res.user == 1) - assert set(res.columns) == set(['user', 'item', 'prediction']) + assert set(res.columns) == set(["user", "item", "prediction"]) assert all(res.item == 31) expected = mlb.algo.mean_ + mlb.algo.item_offsets_.loc[31] + mlb.algo.user_offsets_.loc[1] @@ -45,19 +45,19 @@ def test_predict_user(mlb): test_unrated = np.random.choice(unrated, 10, replace=False) test_items = pd.concat([test_rated, pd.Series(test_unrated)]) - tf = pd.DataFrame({'user': uid, 'item': test_items}) + tf = pd.DataFrame({"user": uid, "item": test_items}) res = lkb.predict(mlb.algo, tf) assert len(res) == 15 - assert set(res.columns) == set(['user', 'item', 'prediction']) + assert set(res.columns) == set(["user", "item", "prediction"]) assert all(res.user == uid) assert set(res.item) == set(test_items) # did we get the right predictions? - preds = res.set_index(['user', 'item']) - preds['rating'] = mlb.algo.mean_ - preds['rating'] += mlb.algo.item_offsets_ - preds['rating'] += mlb.algo.user_offsets_.loc[uid] + preds = res.set_index(["user", "item"]) + preds["rating"] = mlb.algo.mean_ + preds["rating"] += mlb.algo.item_offsets_ + preds["rating"] += mlb.algo.user_offsets_.loc[uid] assert preds.prediction.values == pytest.approx(preds.rating.values) @@ -66,17 +66,17 @@ def test_predict_two_users(mlb): tf = None # make sure we get both UIDs while tf is None or len(set(tf.user)) < 2: - tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item')].sample(10) + tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item")].sample(10) res = lkb.predict(mlb.algo, tf) assert len(res) == 10 assert set(res.user) == set(uids) - preds = res.set_index(['user', 'item']) - preds['rating'] = mlb.algo.mean_ - preds['rating'] += mlb.algo.item_offsets_ - preds['rating'] += mlb.algo.user_offsets_ + preds = res.set_index(["user", "item"]) + preds["rating"] = mlb.algo.mean_ + preds["rating"] += mlb.algo.item_offsets_ + preds["rating"] += mlb.algo.user_offsets_ assert preds.prediction.values == pytest.approx(preds.rating.values) @@ -85,26 +85,26 @@ def test_predict_include_rating(mlb): tf = None # make sure we get both UIDs while tf is None or len(set(tf.user)) < 2: - tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item', 'rating')].sample(10) + tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item", "rating")].sample(10) res = lkb.predict(mlb.algo, tf) assert len(res) == 10 assert set(res.user) == set(uids) - preds = res.set_index(['user', 'item']) - preds['expected'] = mlb.algo.mean_ - preds['expected'] += mlb.algo.item_offsets_ - preds['expected'] += mlb.algo.user_offsets_ + preds = res.set_index(["user", "item"]) + preds["expected"] = mlb.algo.mean_ + preds["expected"] += mlb.algo.item_offsets_ + preds["expected"] += mlb.algo.user_offsets_ assert preds.prediction.values == pytest.approx(preds.expected.values) - urv = mlb.ratings.set_index(['user', 'item']) + urv = mlb.ratings.set_index(["user", "item"]) assert all(preds.rating.values == urv.loc[preds.index, :].rating.values) -@pytest.mark.skipif(not lktu.ml100k.available, reason='ML-100K required') +@pytest.mark.skipif(not lktu.ml100k.available, reason="ML-100K required") @pytest.mark.eval -@pytest.mark.parametrize('ncpus', [None, 1, 2]) +@pytest.mark.parametrize("ncpus", [None, 1, 2]) def test_bias_batch_predict(ncpus): from lenskit.algorithms import bias import lenskit.crossfold as xf @@ -116,19 +116,19 @@ def test_bias_batch_predict(ncpus): algo = bias.Bias(damping=5) def eval(train, test): - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) recs = batch.predict(algo, test, n_jobs=ncpus) return recs - preds = pd.concat((eval(train, test) - for (train, test) - in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) + preds = pd.concat( + (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) + ) - _log.info('analyzing predictions') + _log.info("analyzing predictions") rmse = pm.rmse(preds.prediction, preds.rating) - _log.info('RMSE is %f', rmse) + _log.info("RMSE is %f", rmse) assert rmse == pytest.approx(0.95, abs=0.1) @@ -144,4 +144,4 @@ def test_batch_predict_preshared(): ares = lkb.train_isolated(algo, train) preds = lkb.predict(ares, test) assert len(preds) == len(test) - assert not any(preds['prediction'].isna()) + assert not any(preds["prediction"].isna()) diff --git a/tests/test_batch_recommend.py b/tests/test_batch_recommend.py index 92ad52e6a..2d8a3d08b 100644 --- a/tests/test_batch_recommend.py +++ b/tests/test_batch_recommend.py @@ -12,7 +12,7 @@ from lenskit import batch, topn import lenskit.crossfold as xf -MLB = namedtuple('MLB', ['ratings', 'algo']) +MLB = namedtuple("MLB", ["ratings", "algo"]) _log = logging.getLogger(__name__) @@ -32,33 +32,32 @@ def __init__(self, ratings): self.isolate = False def evaluate(self, algo, train, test, **kwargs): - _log.info('running training') + _log.info("running training") if self.isolate: algo = batch.train_isolated(algo, train) else: algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, **kwargs) return recs def eval_all(self, algo, **kwargs): - return pd.concat(self.evaluate(algo, train, test, **kwargs) - for (train, test) in self.folds) + return pd.concat(self.evaluate(algo, train, test, **kwargs) for (train, test) in self.folds) def check_positive_ndcg(self, recs): - _log.info('analyzing recommendations') + _log.info("analyzing recommendations") rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, self.test) dcg = results.ndcg - _log.info('nDCG for %d users is %f (max=%f)', len(dcg), dcg.mean(), dcg.max()) + _log.info("nDCG for %d users is %f (max=%f)", len(dcg), dcg.mean(), dcg.max()) assert dcg.mean() > 0 @pytest.fixture def ml_folds() -> MLFolds: if not lktu.ml100k.available: - raise pytest.skip('ML-100K not available') + raise pytest.skip("ML-100K not available") ratings = lktu.ml100k.ratings return MLFolds(ratings) @@ -67,9 +66,9 @@ def test_recommend_single(mlb): res = batch.recommend(mlb.algo, [1], None, {1: [31]}) assert len(res) == 1 - assert all(res['user'] == 1) - assert all(res['rank'] == 1) - assert set(res.columns) == set(['user', 'rank', 'item', 'score']) + assert all(res["user"] == 1) + assert all(res["rank"] == 1) + assert set(res.columns) == set(["user", "rank", "item", "score"]) algo = mlb.algo.predictor expected = algo.mean_ + algo.item_offsets_.loc[31] + algo.user_offsets_.loc[1] @@ -87,9 +86,9 @@ def candidates(user): res = batch.recommend(mlb.algo, [5], 10, candidates) assert len(res) == 10 - assert set(res.columns) == set(['user', 'rank', 'item', 'score']) - assert all(res['user'] == uid) - assert all(res['rank'] == np.arange(10) + 1) + assert set(res.columns) == set(["user", "rank", "item", "score"]) + assert all(res["user"] == uid) + assert all(res["rank"] == np.arange(10) + 1) # they should be in decreasing order assert all(np.diff(res.score) <= 0) @@ -105,12 +104,12 @@ def candidates(user): assert len(res) == 20 assert set(res.user) == set([5, 10]) - assert all(res.groupby('user').item.count() == 10) - assert all(res.groupby('user')['rank'].max() == 10) + assert all(res.groupby("user").item.count() == 10) + assert all(res.groupby("user")["rank"].max() == 10) assert all(np.diff(res[res.user == 5].score) <= 0) - assert all(np.diff(res[res.user == 5]['rank']) == 1) + assert all(np.diff(res[res.user == 5]["rank"]) == 1) assert all(np.diff(res[res.user == 10].score) <= 0) - assert all(np.diff(res[res.user == 10]['rank']) == 1) + assert all(np.diff(res[res.user == 10]["rank"]) == 1) def test_recommend_no_cands(mlb): @@ -118,19 +117,19 @@ def test_recommend_no_cands(mlb): assert len(res) == 20 assert set(res.user) == set([5, 10]) - assert all(res.groupby('user').item.count() == 10) - assert all(res.groupby('user')['rank'].max() == 10) + assert all(res.groupby("user").item.count() == 10) + assert all(res.groupby("user")["rank"].max() == 10) assert all(np.diff(res[res.user == 5].score) <= 0) - assert all(np.diff(res[res.user == 5]['rank']) == 1) + assert all(np.diff(res[res.user == 5]["rank"]) == 1) assert all(np.diff(res[res.user == 10].score) <= 0) - assert all(np.diff(res[res.user == 10]['rank']) == 1) + assert all(np.diff(res[res.user == 10]["rank"]) == 1) - idx_rates = mlb.ratings.set_index(['user', 'item']) - merged = res.join(idx_rates, on=['user', 'item'], how='inner') + idx_rates = mlb.ratings.set_index(["user", "item"]) + merged = res.join(idx_rates, on=["user", "item"], how="inner") assert len(merged) == 0 -@pytest.mark.parametrize(('ncpus', 'isolate'), [(None, False), (1, False), (2, True)]) +@pytest.mark.parametrize(("ncpus", "isolate"), [(None, False), (1, False), (2, True)]) @pytest.mark.eval def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate): algo = Bias(damping=5) @@ -142,7 +141,7 @@ def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate): ml_folds.check_positive_ndcg(recs) -@pytest.mark.parametrize('ncpus', [None, 1, 2]) +@pytest.mark.parametrize("ncpus", [None, 1, 2]) @pytest.mark.eval def test_pop_batch_recommend(ml_folds: MLFolds, ncpus): algo = Popular() diff --git a/tests/test_batch_train.py b/tests/test_batch_train.py index 1f79cc841..fbd1a4ca5 100644 --- a/tests/test_batch_train.py +++ b/tests/test_batch_train.py @@ -21,7 +21,7 @@ def test_train_isolate(): def test_train_isolate_file(tmp_path): - fn = tmp_path / 'saved.bpk' + fn = tmp_path / "saved.bpk" algo = Bias() algo = Recommender.adapt(algo) diff --git a/tests/test_bias.py b/tests/test_bias.py index db459427d..275c71c26 100644 --- a/tests/test_bias.py +++ b/tests/test_bias.py @@ -14,9 +14,9 @@ _log = logging.getLogger(__name__) -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_bias_check_arguments(): @@ -39,12 +39,12 @@ def test_bias_full(): assert algo.mean_ == approx(3.5) assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5])) assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([0.25, -0.5, 0])) @@ -54,13 +54,13 @@ def test_bias_clone(): algo.fit(simple_df) params = algo.get_params() - assert sorted(params.keys()) == ['damping', 'items', 'users'] + assert sorted(params.keys()) == ["damping", "items", "users"] a2 = lku.clone(algo) assert a2 is not algo - assert getattr(a2, 'mean_', None) is None - assert getattr(a2, 'item_offsets_', None) is None - assert getattr(a2, 'user_offsets_', None) is None + assert getattr(a2, "mean_", None) is None + assert getattr(a2, "item_offsets_", None) is None + assert getattr(a2, "user_offsets_", None) is None def test_bias_global_only(): @@ -77,7 +77,7 @@ def test_bias_no_user(): assert algo.mean_ == approx(3.5) assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5])) @@ -91,7 +91,7 @@ def test_bias_no_item(): assert algo.item_offsets_ is None assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([1.0, -0.5, -1.5])) @@ -99,8 +99,8 @@ def test_bias_no_item(): def test_bias_index_props(): algo = Bias() algo.fit(simple_df) - assert all(np.sort(algo.user_index) == np.unique(simple_df['user'])) - assert all(np.sort(algo.item_index) == np.unique(simple_df['item'])) + assert all(np.sort(algo.user_index) == np.unique(simple_df["user"])) + assert all(np.sort(algo.item_index) == np.unique(simple_df["item"])) def test_bias_global_predict(): @@ -140,13 +140,13 @@ def test_bias_new_user_predict(): algo = Bias() algo.fit(simple_df) - ratings = pd.DataFrame({'item': [1, 2, 3], 'rating': [1.5, 2.5, 3.5]}) - ratings = ratings.set_index('item').rating + ratings = pd.DataFrame({"item": [1, 2, 3], "rating": [1.5, 2.5, 3.5]}) + ratings = ratings.set_index("item").rating p = algo.predict_for_user(None, [1, 3], ratings=ratings) offs = ratings - algo.mean_ - algo.item_offsets_ umean = offs.mean() - _log.info('user mean is %f', umean) + _log.info("user mean is %f", umean) assert len(p) == 2 assert p.values == approx((algo.mean_ + algo.item_offsets_ + umean).loc[[1, 3]].values) @@ -180,12 +180,12 @@ def test_bias_train_ml_ratings(): algo.fit(ratings) assert algo.mean_ == approx(ratings.rating.mean()) - imeans_data = ratings.groupby('item').rating.mean() + imeans_data = ratings.groupby("item").rating.mean() imeans_algo = algo.item_offsets_ + algo.mean_ ares, data = imeans_algo.align(imeans_data) assert ares.values == approx(data.values) - urates = ratings.set_index('user').loc[2].set_index('item').rating + urates = ratings.set_index("user").loc[2].set_index("item").rating umean = (urates - imeans_data[urates.index]).mean() p = algo.predict_for_user(2, [10, 11, -1]) assert len(p) == 3 @@ -200,15 +200,15 @@ def test_bias_transform(): normed = algo.fit_transform(ratings) - assert all(normed['user'] == ratings['user']) - assert all(normed['item'] == ratings['item']) + assert all(normed["user"] == ratings["user"]) + assert all(normed["item"] == ratings["item"]) denorm = algo.inverse_transform(normed) - assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6) + assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6) - n2 = ratings.join(algo.item_offsets_, on='item') - n2 = n2.join(algo.user_offsets_, on='user') + n2 = ratings.join(algo.item_offsets_, on="item") + n2 = n2.join(algo.user_offsets_, on="user") nr = n2.rating - algo.mean_ - n2.i_off - n2.u_off - assert normed['rating'].values == approx(nr.values) + assert normed["rating"].values == approx(nr.values) def test_bias_transform_indexes(): @@ -217,35 +217,35 @@ def test_bias_transform_indexes(): normed = algo.fit_transform(ratings, indexes=True) - assert all(normed['user'] == ratings['user']) - assert all(normed['item'] == ratings['item']) - assert all(normed['uidx'] == algo.user_offsets_.index.get_indexer(ratings['user'])) - assert all(normed['iidx'] == algo.item_offsets_.index.get_indexer(ratings['item'])) + assert all(normed["user"] == ratings["user"]) + assert all(normed["item"] == ratings["item"]) + assert all(normed["uidx"] == algo.user_offsets_.index.get_indexer(ratings["user"])) + assert all(normed["iidx"] == algo.item_offsets_.index.get_indexer(ratings["item"])) denorm = algo.inverse_transform(normed) - assert denorm['rating'].values == approx(ratings['rating'].values, 1.0e-6) + assert denorm["rating"].values == approx(ratings["rating"].values, 1.0e-6) -@mark.parametrize(['users', 'items'], [(True, False), (False, True), (False, False)]) +@mark.parametrize(["users", "items"], [(True, False), (False, True), (False, False)]) def test_bias_transform_disable(users, items): algo = Bias(users=users, items=items) ratings = ml_test.ratings normed = algo.fit_transform(ratings) - assert all(normed['user'] == ratings['user']) - assert all(normed['item'] == ratings['item']) + assert all(normed["user"] == ratings["user"]) + assert all(normed["item"] == ratings["item"]) denorm = algo.inverse_transform(normed) - assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6) + assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6) n2 = ratings nr = n2.rating - algo.mean_ if items: - n2 = n2.join(algo.item_offsets_, on='item') + n2 = n2.join(algo.item_offsets_, on="item") nr = nr - n2.i_off if users: - n2 = n2.join(algo.user_offsets_, on='user') + n2 = n2.join(algo.user_offsets_, on="user") nr = nr - n2.u_off - assert normed['rating'].values == approx(nr.values) + assert normed["rating"].values == approx(nr.values) def test_bias_item_damp(): @@ -254,7 +254,7 @@ def test_bias_item_damp(): assert algo.mean_ == approx(3.5) assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25])) @@ -268,10 +268,11 @@ def test_bias_user_damp(): assert algo.item_offsets_ is None assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) - assert algo.user_offsets_.loc[[10, 12, 13]].values == \ - approx(np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4) + assert algo.user_offsets_.loc[[10, 12, 13]].values == approx( + np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4 + ) def test_bias_damped(): @@ -280,15 +281,16 @@ def test_bias_damped(): assert algo.mean_ == approx(3.5) assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25])) assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) - assert algo.user_offsets_.loc[[10, 12, 13]].values == \ - approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4) + assert algo.user_offsets_.loc[[10, 12, 13]].values == approx( + np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4 + ) def test_bias_separate_damping(): @@ -297,64 +299,76 @@ def test_bias_separate_damping(): assert algo.mean_ == approx(3.5) assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) - assert algo.item_offsets_.loc[1:3].values == \ - approx(np.array([0, 0.136364, -0.13636]), abs=1.0e-4) + assert algo.item_offsets_.loc[1:3].values == approx( + np.array([0, 0.136364, -0.13636]), abs=1.0e-4 + ) assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) - assert algo.user_offsets_.loc[[10, 12, 13]].values == \ - approx(np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4) + assert algo.user_offsets_.loc[[10, 12, 13]].values == approx( + np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4 + ) + def test_transform_user_with_user_bias(): algo = Bias() algo.fit(simple_df) - new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values + new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values - ratings_with_bias, user_bias = algo.transform_user(new_ratings) # user: 13 + ratings_with_bias, user_bias = algo.transform_user(new_ratings) # user: 13 result = algo.inverse_transform_user(13, ratings_with_bias, user_bias) assert new_ratings[1] == result[1] assert new_ratings[2] == result[2] + def test_transform_user_without_user_bias(): user = 12 algo = Bias() algo.fit(simple_df) - new_ratings = pd.Series([-0.5, 1.5], index=[2, 3]) # items as index and ratings as values + new_ratings = pd.Series([-0.5, 1.5], index=[2, 3]) # items as index and ratings as values v = algo.inverse_transform_user(user, new_ratings) - assert v[2] == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_ - assert v[3] == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_ + assert ( + v[2] + == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_ + ) + assert ( + v[3] + == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_ + ) + def test_bias_save(): original = Bias(damping=5) original.fit(simple_df) assert original.mean_ == approx(3.5) - _log.info('saving baseline model') + _log.info("saving baseline model") mod = pickle.dumps(original) - _log.info('serialized to %d bytes', len(mod)) + _log.info("serialized to %d bytes", len(mod)) algo = pickle.loads(mod) assert algo.mean_ == original.mean_ assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25])) assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) - assert algo.user_offsets_.loc[[10, 12, 13]].values == \ - approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4) + assert algo.user_offsets_.loc[[10, 12, 13]].values == approx( + np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4 + ) def test_bias_binpickle(tmp_path): @@ -362,20 +376,21 @@ def test_bias_binpickle(tmp_path): original.fit(simple_df) assert original.mean_ == approx(3.5) - _log.info('saving baseline model') - fn = tmp_path / 'bias.bpk' + _log.info("saving baseline model") + fn = tmp_path / "bias.bpk" binpickle.dump(original, fn) algo = binpickle.load(fn) assert algo.mean_ == original.mean_ assert algo.item_offsets_ is not None - assert algo.item_offsets_.index.name == 'item' + assert algo.item_offsets_.index.name == "item" assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25])) assert algo.user_offsets_ is not None - assert algo.user_offsets_.index.name == 'user' + assert algo.user_offsets_.index.name == "user" assert set(algo.user_offsets_.index) == set([10, 12, 13]) - assert algo.user_offsets_.loc[[10, 12, 13]].values == \ - approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4) + assert algo.user_offsets_.loc[[10, 12, 13]].values == approx( + np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4 + ) diff --git a/tests/test_candidate_selector.py b/tests/test_candidate_selector.py index e5eae1615..0d614a0b2 100644 --- a/tests/test_candidate_selector.py +++ b/tests/test_candidate_selector.py @@ -4,9 +4,9 @@ import pandas as pd import numpy as np -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_empty(): @@ -45,7 +45,7 @@ def test_unrated_big(): ratings = lktu.ml_test.ratings users = ratings.user.unique() items = ratings.item.unique() - user_items = ratings.set_index('user').item + user_items = ratings.set_index("user").item sel = basic.UnratedItemCandidateSelector() s2 = sel.fit(ratings) diff --git a/tests/test_crossfold.py b/tests/test_crossfold.py index 04efadb53..aa660f2c3 100644 --- a/tests/test_crossfold.py +++ b/tests/test_crossfold.py @@ -19,8 +19,8 @@ def test_partition_rows(): for s in splits: assert len(s.test) + len(s.train) == len(ratings) assert all(s.test.index.union(s.train.index) == ratings.index) - test_idx = s.test.set_index(['user', 'item']).index - train_idx = s.train.set_index(['user', 'item']).index + test_idx = s.test.set_index(["user", "item"]).index + train_idx = s.train.set_index(["user", "item"]).index assert len(test_idx.intersection(train_idx)) == 0 # we should partition! @@ -28,8 +28,8 @@ def test_partition_rows(): if s1 is s2: continue - i1 = s1.test.set_index(['user', 'item']).index - i2 = s2.test.set_index(['user', 'item']).index + i1 = s1.test.set_index(["user", "item"]).index + i2 = s2.test.set_index(["user", "item"]).index inter = i1.intersection(i2) assert len(inter) == 0 @@ -46,16 +46,16 @@ def test_sample_rows(): for s in splits: assert len(s.test) == 1000 assert len(s.test) + len(s.train) == len(ratings) - test_idx = s.test.set_index(['user', 'item']).index - train_idx = s.train.set_index(['user', 'item']).index + test_idx = s.test.set_index(["user", "item"]).index + train_idx = s.train.set_index(["user", "item"]).index assert len(test_idx.intersection(train_idx)) == 0 for s1, s2 in it.product(splits, splits): if s1 is s2: continue - i1 = s1.test.set_index(['user', 'item']).index - i2 = s2.test.set_index(['user', 'item']).index + i1 = s1.test.set_index(["user", "item"]).index + i2 = s2.test.set_index(["user", "item"]).index inter = i1.intersection(i2) assert len(inter) == 0 @@ -69,16 +69,16 @@ def test_sample_rows_more_smaller_parts(): for s in splits: assert len(s.test) == 500 assert len(s.test) + len(s.train) == len(ratings) - test_idx = s.test.set_index(['user', 'item']).index - train_idx = s.train.set_index(['user', 'item']).index + test_idx = s.test.set_index(["user", "item"]).index + train_idx = s.train.set_index(["user", "item"]).index assert len(test_idx.intersection(train_idx)) == 0 for s1, s2 in it.product(splits, splits): if s1 is s2: continue - i1 = s1.test.set_index(['user', 'item']).index - i2 = s2.test.set_index(['user', 'item']).index + i1 = s1.test.set_index(["user", "item"]).index + i2 = s2.test.set_index(["user", "item"]).index inter = i1.intersection(i2) assert len(inter) == 0 @@ -92,13 +92,15 @@ def test_sample_non_disjoint(): for s in splits: assert len(s.test) == 1000 assert len(s.test) + len(s.train) == len(ratings) - test_idx = s.test.set_index(['user', 'item']).index - train_idx = s.train.set_index(['user', 'item']).index + test_idx = s.test.set_index(["user", "item"]).index + train_idx = s.train.set_index(["user", "item"]).index assert len(test_idx.intersection(train_idx)) == 0 # There are enough splits & items we should pick at least one duplicate - ipairs = ((s1.test.set_index('user', 'item').index, s2.test.set_index('user', 'item').index) - for (s1, s2) in it.product(splits, splits)) + ipairs = ( + (s1.test.set_index("user", "item").index, s2.test.set_index("user", "item").index) + for (s1, s2) in it.product(splits, splits) + ) isizes = [len(i1.intersection(i2)) for (i1, i2) in ipairs] assert any(n > 0 for n in isizes) @@ -113,8 +115,8 @@ def test_sample_oversize(): for s in splits: assert len(s.test) + len(s.train) == len(ratings) assert all(s.test.index.union(s.train.index) == ratings.index) - test_idx = s.test.set_index(['user', 'item']).index - train_idx = s.train.set_index(['user', 'item']).index + test_idx = s.test.set_index(["user", "item"]).index + train_idx = s.train.set_index(["user", "item"]).index assert len(test_idx.intersection(train_idx)) == 0 @@ -190,7 +192,7 @@ def test_last_frac(): ratings = lktu.ml_test.ratings users = np.random.choice(ratings.user.unique(), 5, replace=False) - samp = xf.LastFrac(0.2, 'timestamp') + samp = xf.LastFrac(0.2, "timestamp") for u in users: udf = ratings[ratings.user == u] tst = samp(udf) @@ -200,7 +202,7 @@ def test_last_frac(): assert len(tst) <= math.ceil(len(udf) * 0.2) assert tst.timestamp.min() >= trn.timestamp.max() - samp = xf.LastFrac(0.5, 'timestamp') + samp = xf.LastFrac(0.5, "timestamp") for u in users: udf = ratings[ratings.user == u] tst = samp(udf) @@ -218,14 +220,13 @@ def test_partition_users(): assert len(splits) == 5 for s in splits: - ucounts = s.test.groupby('user').agg('count') + ucounts = s.test.groupby("user").agg("count") assert all(ucounts == 5) assert all(s.test.index.union(s.train.index) == ratings.index) - assert all(s.train['user'].isin(s.train['user'].unique())) + assert all(s.train["user"].isin(s.train["user"].unique())) assert len(s.test) + len(s.train) == len(ratings) - users = ft.reduce(lambda us1, us2: us1 | us2, - (set(s.test.user) for s in splits)) + users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits)) assert len(users) == ratings.user.nunique() assert users == set(ratings.user) @@ -235,9 +236,9 @@ def test_partition_may_skip_train(): ratings = lktu.ml_test.ratings # make a data set where some users only have 1 rating ratings = ratings.sample(frac=0.1) - users = ratings.groupby('user')['rating'].count() + users = ratings.groupby("user")["rating"].count() assert users.min() == 1.0 # we should have some small users! - users.name = 'ur_count' + users.name = "ur_count" splits = xf.partition_users(ratings, 5, xf.SampleN(1)) splits = list(splits) @@ -246,12 +247,12 @@ def test_partition_may_skip_train(): # now we go make sure we're missing some users! And don't have any NaN ratings for train, test in splits: # no null ratings - assert all(train['rating'].notna()) + assert all(train["rating"].notna()) # see if test users with 1 rating are missing from train - test = test.join(users, on='user') - assert all(~(test.loc[test['ur_count'] == 1, 'user'].isin(train['user'].unique()))) + test = test.join(users, on="user") + assert all(~(test.loc[test["ur_count"] == 1, "user"].isin(train["user"].unique()))) # and users with more than one rating are in train - assert all(test.loc[test['ur_count'] > 1, 'user'].isin(train['user'].unique())) + assert all(test.loc[test["ur_count"] > 1, "user"].isin(train["user"].unique())) def test_partition_users_frac(): @@ -259,19 +260,18 @@ def test_partition_users_frac(): splits = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) splits = list(splits) assert len(splits) == 5 - ucounts = ratings.groupby('user').item.count() + ucounts = ratings.groupby("user").item.count() uss = ucounts * 0.2 for s in splits: - tucs = s.test.groupby('user').item.count() + tucs = s.test.groupby("user").item.count() assert all(tucs >= uss.loc[tucs.index] - 1) assert all(tucs <= uss.loc[tucs.index] + 1) assert all(s.test.index.union(s.train.index) == ratings.index) assert len(s.test) + len(s.train) == len(ratings) # we have all users - users = ft.reduce(lambda us1, us2: us1 | us2, - (set(s.test.user) for s in splits)) + users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits)) assert len(users) == ratings.user.nunique() assert users == set(ratings.user) @@ -283,7 +283,7 @@ def test_sample_users(): assert len(splits) == 5 for s in splits: - ucounts = s.test.groupby('user').agg('count') + ucounts = s.test.groupby("user").agg("count") assert len(s.test) == 5 * 100 assert len(ucounts) == 100 assert all(ucounts == 5) @@ -304,11 +304,11 @@ def test_sample_users_frac(): splits = xf.sample_users(ratings, 5, 100, xf.SampleFrac(0.2)) splits = list(splits) assert len(splits) == 5 - ucounts = ratings.groupby('user').item.count() + ucounts = ratings.groupby("user").item.count() uss = ucounts * 0.2 for s in splits: - tucs = s.test.groupby('user').item.count() + tucs = s.test.groupby("user").item.count() assert len(tucs) == 100 assert all(tucs >= uss.loc[tucs.index] - 1) assert all(tucs <= uss.loc[tucs.index] + 1) @@ -332,14 +332,13 @@ def test_sample_users_frac_oversize(): assert len(splits) == 20 for s in splits: - ucounts = s.test.groupby('user').agg('count') + ucounts = s.test.groupby("user").agg("count") assert len(ucounts) < 100 assert all(ucounts == 5) assert all(s.test.index.union(s.train.index) == ratings.index) assert len(s.test) + len(s.train) == len(ratings) - users = ft.reduce(lambda us1, us2: us1 | us2, - (set(s.test.user) for s in splits)) + users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits)) assert len(users) == ratings.user.nunique() assert users == set(ratings.user) for s1, s2 in it.product(splits, splits): @@ -358,7 +357,7 @@ def test_sample_users_frac_oversize_ndj(): assert len(splits) == 20 for s in splits: - ucounts = s.test.groupby('user').agg('count') + ucounts = s.test.groupby("user").agg("count") assert len(ucounts) == 100 assert len(s.test) == 5 * 100 assert all(ucounts == 5) @@ -369,7 +368,7 @@ def test_sample_users_frac_oversize_ndj(): def test_non_unique_index_partition_users(): """Partitioning users when dataframe has non-unique indices""" ratings = lktu.ml_test.ratings - ratings = ratings.set_index('user') ##forces non-unique index + ratings = ratings.set_index("user") ##forces non-unique index with pytest.raises(ValueError): for split in xf.partition_users(ratings, 5, xf.SampleN(5)): pass @@ -378,7 +377,7 @@ def test_non_unique_index_partition_users(): def test_sample_users(): """Sampling users when dataframe has non-unique indices""" ratings = lktu.ml_test.ratings - ratings = ratings.set_index('user') ##forces non-unique index + ratings = ratings.set_index("user") ##forces non-unique index with pytest.raises(ValueError): for split in xf.sample_users(ratings, 5, 100, xf.SampleN(5)): pass @@ -387,7 +386,7 @@ def test_sample_users(): def test_sample_rows(): """Sampling ratings when dataframe has non-unique indices""" ratings = lktu.ml_test.ratings - ratings = ratings.set_index('user') ##forces non-unique index + ratings = ratings.set_index("user") ##forces non-unique index with pytest.raises(ValueError): for split in xf.sample_rows(ratings, partitions=5, size=1000): pass @@ -396,7 +395,7 @@ def test_sample_rows(): def test_partition_users(): """Partitioning ratings when dataframe has non-unique indices""" ratings = lktu.ml_test.ratings - ratings = ratings.set_index('user') ##forces non-unique index + ratings = ratings.set_index("user") ##forces non-unique index with pytest.raises(ValueError): for split in xf.partition_users(ratings, 5, xf.SampleN(5)): pass diff --git a/tests/test_fallback.py b/tests/test_fallback.py index ba958bf43..b2644b866 100644 --- a/tests/test_fallback.py +++ b/tests/test_fallback.py @@ -9,9 +9,9 @@ import lenskit.util.test as lktu from pytest import approx -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_fallback_train_one(): @@ -42,15 +42,15 @@ def test_fallback_list(): assert len(algo.algorithms) == 2 params = algo.get_params() - assert list(params.keys()) == ['algorithms'] - assert len(params['algorithms']) == 2 - assert isinstance(params['algorithms'][0], basic.Memorized) - assert isinstance(params['algorithms'][1], Bias) + assert list(params.keys()) == ["algorithms"] + assert len(params["algorithms"]) == 2 + assert isinstance(params["algorithms"][0], basic.Memorized) + assert isinstance(params["algorithms"][1], Bias) def test_fallback_string(): algo = basic.Fallback([basic.Memorized(simple_df), Bias()]) - assert 'Fallback' in str(algo) + assert "Fallback" in str(algo) def test_fallback_clone(): @@ -110,7 +110,7 @@ def test_fallback_save_load(tmp_path): original = basic.Fallback(basic.Memorized(simple_df), Bias()) original.fit(lktu.ml_test.ratings) - fn = tmp_path / 'fb.mod' + fn = tmp_path / "fb.mod" binpickle.dump(original, fn) diff --git a/tests/test_funksvd.py b/tests/test_funksvd.py index 149ed1f2d..ffcc2ac7f 100644 --- a/tests/test_funksvd.py +++ b/tests/test_funksvd.py @@ -13,9 +13,9 @@ _log = logging.getLogger(__name__) -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_fsvd_basic_build(): @@ -136,7 +136,7 @@ def test_fsvd_save_load(): assert original.user_features_.shape == (ratings.user.nunique(), 20) mod = pickle.dumps(original) - _log.info('serialized to %d bytes', len(mod)) + _log.info("serialized to %d bytes", len(mod)) algo = pickle.loads(mod) assert algo.bias.mean_ == original.bias.mean_ @@ -151,7 +151,7 @@ def test_fsvd_save_load(): @lktu.wantjit @mark.slow def test_fsvd_train_binary(): - ratings = lktu.ml_test.ratings.drop(columns=['rating', 'timestamp']) + ratings = lktu.ml_test.ratings.drop(columns=["rating", "timestamp"]) original = svd.FunkSVD(20, iterations=20, bias=False) original.fit(ratings) @@ -165,19 +165,19 @@ def test_fsvd_train_binary(): @mark.slow def test_fsvd_known_preds(): algo = svd.FunkSVD(15, iterations=125, lrate=0.001) - _log.info('training %s on ml data', algo) + _log.info("training %s on ml data", algo) algo.fit(lktu.ml_test.ratings) dir = Path(__file__).parent - pred_file = dir / 'funksvd-preds.csv' - _log.info('reading known predictions from %s', pred_file) + pred_file = dir / "funksvd-preds.csv" + _log.info("reading known predictions from %s", pred_file) known_preds = pd.read_csv(str(pred_file)) - pairs = known_preds.loc[:, ['user', 'item']] + pairs = known_preds.loc[:, ["user", "item"]] preds = algo.predict(pairs) - known_preds.rename(columns={'prediction': 'expected'}, inplace=True) + known_preds.rename(columns={"prediction": "expected"}, inplace=True) merged = known_preds.assign(prediction=preds) - merged['error'] = merged.expected - merged.prediction + merged["error"] = merged.expected - merged.prediction assert not any(merged.prediction.isna() & merged.expected.notna()) err = merged.error err = err[err.notna()] @@ -185,14 +185,14 @@ def test_fsvd_known_preds(): assert all(err.abs() < 0.01) except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] - _log.error('erroneous predictions:\n%s', bad) + _log.error("erroneous predictions:\n%s", bad) raise e @lktu.wantjit @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_fsvd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias @@ -206,9 +206,9 @@ def test_fsvd_batch_accuracy(): algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) @@ -216,5 +216,5 @@ def eval(train, test): mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05) diff --git a/tests/test_knn_item_item.py b/tests/test_knn_item_item.py index 0518b1cb2..e4a3f6097 100644 --- a/tests/test_knn_item_item.py +++ b/tests/test_knn_item_item.py @@ -28,59 +28,62 @@ _log = logging.getLogger(__name__) ml_ratings = lktu.ml_test.ratings -simple_ratings = pd.DataFrame.from_records([ - (1, 6, 4.0), - (2, 6, 2.0), - (1, 7, 3.0), - (2, 7, 2.0), - (3, 7, 5.0), - (4, 7, 2.0), - (1, 8, 3.0), - (2, 8, 4.0), - (3, 8, 3.0), - (4, 8, 2.0), - (5, 8, 3.0), - (6, 8, 2.0), - (1, 9, 3.0), - (3, 9, 4.0) -], columns=['user', 'item', 'rating']) - - -@fixture(scope='module') +simple_ratings = pd.DataFrame.from_records( + [ + (1, 6, 4.0), + (2, 6, 2.0), + (1, 7, 3.0), + (2, 7, 2.0), + (3, 7, 5.0), + (4, 7, 2.0), + (1, 8, 3.0), + (2, 8, 4.0), + (3, 8, 3.0), + (4, 8, 2.0), + (5, 8, 3.0), + (6, 8, 2.0), + (1, 9, 3.0), + (3, 9, 4.0), + ], + columns=["user", "item", "rating"], +) + + +@fixture(scope="module") def ml_subset(): "Fixture that returns a subset of the MovieLens database." ratings = lktu.ml_test.ratings - icounts = ratings.groupby('item').rating.count() + icounts = ratings.groupby("item").rating.count() top = icounts.nlargest(500) - ratings = ratings.set_index('item') + ratings = ratings.set_index("item") top_rates = ratings.loc[top.index, :] - _log.info('top 500 items yield %d of %d ratings', len(top_rates), len(ratings)) + _log.info("top 500 items yield %d of %d ratings", len(top_rates), len(ratings)) return top_rates.reset_index() def test_ii_dft_config(): algo = knn.ItemItem(30, save_nbrs=500) assert algo.center - assert algo.aggregate == 'weighted-average' + assert algo.aggregate == "weighted-average" assert algo.use_ratings def test_ii_exp_config(): - algo = knn.ItemItem(30, save_nbrs=500, feedback='explicit') + algo = knn.ItemItem(30, save_nbrs=500, feedback="explicit") assert algo.center - assert algo.aggregate == 'weighted-average' + assert algo.aggregate == "weighted-average" assert algo.use_ratings def test_ii_imp_config(): - algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit') + algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit") assert not algo.center - assert algo.aggregate == 'sum' + assert algo.aggregate == "sum" assert not algo.use_ratings def test_ii_imp_clone(): - algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit') + algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit") a2 = clone(algo) assert a2.get_params() == algo.get_params() @@ -98,17 +101,17 @@ def test_ii_train(): # 6 is a neighbor of 7 six, seven = algo.item_index_.get_indexer([6, 7]) - _log.info('six: %d', six) - _log.info('seven: %d', seven) - _log.info('matrix: %s', algo.sim_matrix_) + _log.info("six: %d", six) + _log.info("seven: %d", seven) + _log.info("matrix: %s", algo.sim_matrix_) assert matrix[six, seven] > 0 # and has the correct score - six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating + six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating six_v = six_v - six_v.mean() - seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating + seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating seven_v = seven_v - seven_v.mean() denom = la.norm(six_v.values) * la.norm(seven_v.values) - six_v, seven_v = six_v.align(seven_v, join='inner') + six_v, seven_v = six_v.align(seven_v, join="inner") num = six_v.dot(seven_v) assert matrix[six, seven] == approx(num / denom, 0.01) @@ -133,12 +136,12 @@ def test_ii_train_unbounded(): assert matrix[six, seven] > 0 # and has the correct score - six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating + six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating six_v = six_v - six_v.mean() - seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating + seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating seven_v = seven_v - seven_v.mean() denom = la.norm(six_v.values) * la.norm(seven_v.values) - six_v, seven_v = six_v.align(seven_v, join='inner') + six_v, seven_v = six_v.align(seven_v, join="inner") num = six_v.dot(seven_v) assert matrix[six, seven] == approx(num / denom, 0.01) @@ -155,8 +158,8 @@ def test_ii_simple_predict(): def test_ii_simple_implicit_predict(): - algo = knn.ItemItem(30, center=False, aggregate='sum') - algo.fit(simple_ratings.loc[:, ['user', 'item']]) + algo = knn.ItemItem(30, center=False, aggregate="sum") + algo.fit(simple_ratings.loc[:, ["user", "item"]]) res = algo.predict_for_user(3, [6]) assert res is not None @@ -168,9 +171,7 @@ def test_ii_simple_implicit_predict(): @mark.skip("currently broken") def test_ii_warn_duplicates(): - extra = pd.DataFrame.from_records([ - (3, 7, 4.5) - ], columns=['user', 'item', 'rating']) + extra = pd.DataFrame.from_records([(3, 7, 4.5)], columns=["user", "item", "rating"]) ratings = pd.concat([simple_ratings, extra]) algo = knn.ItemItem(5) algo.fit(ratings) @@ -193,7 +194,7 @@ def test_ii_warns_center(): def test_ii_warns_center_with_no_use_ratings(): "Test that item-item warns if you configure to ignore ratings but center." with pytest.warns(ConfigWarning): - knn.ItemItem(5, use_ratings=False, aggregate='sum') + knn.ItemItem(5, use_ratings=False, aggregate="sum") def test_ii_warns_wa_with_no_use_ratings(): @@ -216,7 +217,7 @@ def test_ii_train_big(): assert algo.item_counts_.sum() == algo.sim_matrix_.nnz - means = ml_ratings.groupby('item').rating.mean() + means = ml_ratings.groupby("item").rating.mean() assert means[algo.item_index_].values == approx(algo.item_means_) @@ -234,20 +235,20 @@ def test_ii_train_big_unbounded(): assert algo.item_counts_.sum() == algo.sim_matrix_.nnz - means = ml_ratings.groupby('item').rating.mean() + means = ml_ratings.groupby("item").rating.mean() assert means[algo.item_index_].values == approx(algo.item_means_) @lktu.wantjit -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_ii_train_ml100k(tmp_path): "Test an unbounded model on ML-100K" ratings = lktu.ml100k.ratings algo = knn.ItemItem(30) - _log.info('training model') + _log.info("training model") algo.fit(ratings) - _log.info('testing model') + _log.info("testing model") assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) @@ -257,17 +258,17 @@ def test_ii_train_ml100k(tmp_path): assert algo.item_counts_.sum() == algo.sim_matrix_.nnz - means = ratings.groupby('item').rating.mean() + means = ratings.groupby("item").rating.mean() assert means[algo.item_index_].values == approx(algo.item_means_) # save - fn = tmp_path / 'ii.mod' - _log.info('saving model to %s', fn) - with fn.open('wb') as modf: + fn = tmp_path / "ii.mod" + _log.info("saving model to %s", fn) + with fn.open("wb") as modf: pickle.dump(algo, modf) - _log.info('reloading model') - with fn.open('rb') as modf: + _log.info("reloading model") + with fn.open("rb") as modf: restored = pickle.load(modf) assert all(restored.sim_matrix_.values > 0) @@ -290,22 +291,22 @@ def test_ii_train_ml100k(tmp_path): @mark.slow def test_ii_large_models(): "Several tests of large trained I-I models" - _log.info('training limited model') + _log.info("training limited model") MODEL_SIZE = 100 algo_lim = knn.ItemItem(30, save_nbrs=MODEL_SIZE) algo_lim.fit(ml_ratings) - _log.info('training unbounded model') + _log.info("training unbounded model") algo_ub = knn.ItemItem(30) algo_ub.fit(ml_ratings) - _log.info('testing models') + _log.info("testing models") assert all(np.logical_not(np.isnan(algo_lim.sim_matrix_.values))) assert all(algo_lim.sim_matrix_.values > 0) # a little tolerance assert all(algo_lim.sim_matrix_.values < 1 + 1.0e-6) - means = ml_ratings.groupby('item').rating.mean() + means = ml_ratings.groupby("item").rating.mean() assert means[algo_lim.item_index_].values == approx(algo_lim.item_means_) assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.values))) @@ -313,24 +314,26 @@ def test_ii_large_models(): # a little tolerance assert all(algo_ub.sim_matrix_.values < 1 + 1.0e-6) - means = ml_ratings.groupby('item').rating.mean() + means = ml_ratings.groupby("item").rating.mean() assert means[algo_ub.item_index_].values == approx(algo_ub.item_means_) - mc_rates = ml_ratings.set_index('item')\ - .join(pd.DataFrame({'item_mean': means}))\ - .assign(rating=lambda df: df.rating - df.item_mean) + mc_rates = ( + ml_ratings.set_index("item") + .join(pd.DataFrame({"item_mean": means})) + .assign(rating=lambda df: df.rating - df.item_mean) + ) mat_lim = algo_lim.sim_matrix_.to_scipy() mat_ub = algo_ub.sim_matrix_.to_scipy() - _log.info('checking a sample of neighborhoods') + _log.info("checking a sample of neighborhoods") items = pd.Series(algo_ub.item_index_) items = items[algo_ub.item_counts_ > 0] for i in items.sample(50): ipos = algo_ub.item_index_.get_loc(i) - _log.debug('checking item %d at position %d', i, ipos) + _log.debug("checking item %d at position %d", i, ipos) assert ipos == algo_lim.item_index_.get_loc(i) - irates = mc_rates.loc[[i], :].set_index('user').rating + irates = mc_rates.loc[[i], :].set_index("user").rating ub_row = mat_ub.getrow(ipos) b_row = mat_lim.getrow(ipos) @@ -345,14 +348,14 @@ def test_ii_large_models(): # spot-check some similarities for n in pd.Series(ub_row.indices).sample(min(10, len(ub_row.indices))): n_id = algo_ub.item_index_[n] - n_rates = mc_rates.loc[n_id, :].set_index('user').rating + n_rates = mc_rates.loc[n_id, :].set_index("user").rating ir, nr = irates.align(n_rates, fill_value=0) cor = ir.corr(nr) assert mat_ub[ipos, n] == approx(cor) # short rows are equal if b_row.nnz < MODEL_SIZE: - _log.debug('short row of length %d', b_row.nnz) + _log.debug("short row of length %d", b_row.nnz) assert b_row.nnz == ub_row.nnz ub_row.sort_indices() b_row.sort_indices() @@ -367,7 +370,7 @@ def test_ii_large_models(): assert len(b_nbrs) <= MODEL_SIZE assert all(b_nbrs.index.isin(ub_nbrs.index)) # the similarities should be equal! - b_match, ub_match = b_nbrs.align(ub_nbrs, join='inner') + b_match, ub_match = b_nbrs.align(ub_nbrs, join="inner") assert all(b_match == b_nbrs) assert b_match.values == approx(ub_match.values) assert b_nbrs.max() == approx(ub_nbrs.max()) @@ -385,19 +388,19 @@ def test_ii_large_models(): def test_ii_save_load(tmp_path, ml_subset): "Save and load a model" original = knn.ItemItem(30, save_nbrs=500) - _log.info('building model') + _log.info("building model") original.fit(ml_subset) - fn = tmp_path / 'ii.mod' - _log.info('saving model to %s', fn) - with fn.open('wb') as modf: + fn = tmp_path / "ii.mod" + _log.info("saving model to %s", fn) + with fn.open("wb") as modf: pickle.dump(original, modf) - _log.info('reloading model') - with fn.open('rb') as modf: + _log.info("reloading model") + with fn.open("rb") as modf: algo = pickle.load(modf) - _log.info('checking model') + _log.info("checking model") assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance @@ -421,7 +424,7 @@ def test_ii_save_load(tmp_path, ml_subset): assert all(np.diff(r_mat.values[sp:ep]) <= 0) assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep]) - means = ml_ratings.groupby('item').rating.mean() + means = ml_ratings.groupby("item").rating.mean() assert means[algo.item_index_].values == approx(original.item_means_) matrix = algo.sim_matrix_.to_scipy() @@ -430,7 +433,7 @@ def test_ii_save_load(tmp_path, ml_subset): items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) - _log.debug('checking item %d at position %d', i, ipos) + _log.debug("checking item %d at position %d", i, ipos) row = matrix.getrow(ipos) @@ -441,20 +444,20 @@ def test_ii_save_load(tmp_path, ml_subset): def test_ii_implicit_save_load(tmp_path, ml_subset): "Save and load a model" - original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum') - _log.info('building model') - original.fit(ml_subset.loc[:, ['user', 'item']]) + original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate="sum") + _log.info("building model") + original.fit(ml_subset.loc[:, ["user", "item"]]) - fn = tmp_path / 'ii.mod' - _log.info('saving model to %s', fn) - with fn.open('wb') as modf: + fn = tmp_path / "ii.mod" + _log.info("saving model to %s", fn) + with fn.open("wb") as modf: pickle.dump(original, modf) - _log.info('reloading model') - with fn.open('rb') as modf: + _log.info("reloading model") + with fn.open("rb") as modf: algo = pickle.load(modf) - _log.info('checking model') + _log.info("checking model") assert all(np.logical_not(np.isnan(algo.sim_matrix_.values))) assert all(algo.sim_matrix_.values > 0) # a little tolerance @@ -487,7 +490,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset): items = items[algo.item_counts_ > 0] for i in items.sample(50): ipos = algo.item_index_.get_loc(i) - _log.debug('checking item %d at position %d', i, ipos) + _log.debug("checking item %d at position %d", i, ipos) row = matrix.getrow(ipos) @@ -499,8 +502,8 @@ def test_ii_implicit_save_load(tmp_path, ml_subset): @lktu.wantjit @mark.slow def test_ii_old_implicit(): - algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') - data = ml_ratings.loc[:, ['user', 'item']] + algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum") + data = ml_ratings.loc[:, ["user", "item"]] algo.fit(data) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz @@ -514,10 +517,10 @@ def test_ii_old_implicit(): @lktu.wantjit @mark.slow def test_ii_no_ratings(): - a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') - a1.fit(ml_ratings.loc[:, ['user', 'item']]) + a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum") + a1.fit(ml_ratings.loc[:, ["user", "item"]]) - algo = knn.ItemItem(20, save_nbrs=100, feedback='implicit') + algo = knn.ItemItem(20, save_nbrs=100, feedback="implicit") algo.fit(ml_ratings) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz @@ -533,8 +536,8 @@ def test_ii_no_ratings(): @mark.slow def test_ii_implicit_fast_ident(): - algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum') - data = ml_ratings.loc[:, ['user', 'item']] + algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum") + data = ml_ratings.loc[:, ["user", "item"]] algo.fit(data) assert algo.item_counts_.sum() == algo.sim_matrix_.nnz @@ -553,7 +556,7 @@ def test_ii_implicit_fast_ident(): @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_ii_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias @@ -567,18 +570,18 @@ def test_ii_batch_accuracy(): algo = basic.Fallback(ii_algo, bias.Bias()) def eval(train, test): - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return batch.predict(algo, test, n_jobs=4) - preds = pd.concat((eval(train, test) - for (train, test) - in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))) + preds = pd.concat( + (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) + ) mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.70, abs=0.025) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.90, abs=0.05) @@ -588,27 +591,27 @@ def test_ii_known_preds(): from lenskit import batch algo = knn.ItemItem(20, min_sim=1.0e-6) - _log.info('training %s on ml data', algo) + _log.info("training %s on ml data", algo) algo.fit(lktu.ml_test.ratings) assert algo.center assert algo.item_means_ is not None - _log.info('model means: %s', algo.item_means_) + _log.info("model means: %s", algo.item_means_) dir = Path(__file__).parent - pred_file = dir / 'item-item-preds.csv' - _log.info('reading known predictions from %s', pred_file) + pred_file = dir / "item-item-preds.csv" + _log.info("reading known predictions from %s", pred_file) known_preds = pd.read_csv(str(pred_file)) - pairs = known_preds.loc[:, ['user', 'item']] + pairs = known_preds.loc[:, ["user", "item"]] preds = batch.predict(algo, pairs) - merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds) + merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds) assert len(merged) == len(preds) - merged['error'] = merged.expected - merged.prediction + merged["error"] = merged.expected - merged.prediction try: assert not any(merged.prediction.isna() & merged.expected.notna()) except AssertionError as e: bad = merged[merged.prediction.isna() & merged.expected.notna()] - _log.error('erroneously missing or present predictions:\n%s', bad) + _log.error("erroneously missing or present predictions:\n%s", bad) raise e err = merged.error @@ -617,33 +620,33 @@ def test_ii_known_preds(): assert all(err.abs() < 0.03) # FIXME this threshold is too high except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] - _log.error('erroneous predictions:\n%s', bad) + _log.error("erroneous predictions:\n%s", bad) raise e def _train_ii(): algo = knn.ItemItem(20, min_sim=1.0e-6) timer = Stopwatch() - _log.info('training %s on ml data', algo) + _log.info("training %s on ml data", algo) algo.fit(lktu.ml_test.ratings) - _log.info('trained in %s', timer) + _log.info("trained in %s", timer) shr = persist(algo) return shr.transfer() @lktu.wantjit @mark.slow -@mark.skip('no longer testing II match') -@mark.skipif(csrk.name != 'csr.kernels.mkl', reason='only needed when MKL is available') +@mark.skip("no longer testing II match") +@mark.skipif(csrk.name != "csr.kernels.mkl", reason="only needed when MKL is available") def test_ii_impl_match(): mkl_h = None nba_h = None try: - with lktu.set_env_var('CSR_KERNEL', 'mkl'): + with lktu.set_env_var("CSR_KERNEL", "mkl"): mkl_h = run_sp(_train_ii) mkl = mkl_h.get() - with lktu.set_env_var('CSR_KERNEL', 'numba'): + with lktu.set_env_var("CSR_KERNEL", "numba"): nba_h = run_sp(_train_ii) nba = nba_h.get() @@ -657,8 +660,9 @@ def test_ii_impl_match(): assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0) assert all(np.diff(nba.sim_matrix_.values[sp:ep]) <= 0) assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(nba.sim_matrix_.colinds[sp:ep]) - assert mkl.sim_matrix_.values[sp:ep] == \ - approx(nba.sim_matrix_.values[sp:ep], abs=1.0e-3) + assert mkl.sim_matrix_.values[sp:ep] == approx( + nba.sim_matrix_.values[sp:ep], abs=1.0e-3 + ) finally: mkl = None @@ -671,8 +675,8 @@ def test_ii_impl_match(): @lktu.wantjit @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K not available') -@mark.parametrize('ncpus', [1, 2]) +@mark.skipif(not lktu.ml100k.available, reason="ML100K not available") +@mark.parametrize("ncpus", [1, 2]) def test_ii_batch_recommend(ncpus): import lenskit.crossfold as xf from lenskit import topn @@ -680,11 +684,11 @@ def test_ii_batch_recommend(ncpus): ratings = lktu.ml100k.ratings def eval(train, test): - _log.info('running training') + _log.info("running training") algo = knn.ItemItem(30) algo = Recommender.adapt(algo) algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus) return recs @@ -697,21 +701,21 @@ def eval(train, test): test = pd.concat(test_frames) recs = pd.concat(recs) - _log.info('analyzing recommendations') + _log.info("analyzing recommendations") rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) results = rla.compute(recs, test) dcg = results.ndcg - _log.info('nDCG for %d users is %f', len(dcg), dcg.mean()) + _log.info("nDCG for %d users is %f", len(dcg), dcg.mean()) assert dcg.mean() > 0.03 def _build_predict(ratings, fold): algo = Fallback(knn.ItemItem(20), Bias(5)) - train = ratings[ratings['partition'] != fold] + train = ratings[ratings["partition"] != fold] algo.fit(train) - test = ratings[ratings['partition'] == fold] + test = ratings[ratings["partition"] == fold] preds = batch.predict(algo, test, n_jobs=1) return preds @@ -721,7 +725,7 @@ def _build_predict(ratings, fold): def test_ii_parallel_multi_build(): "Build multiple item-item models in parallel" ratings = lktu.ml_test.ratings - ratings['partition'] = np.random.choice(4, len(ratings), replace=True) + ratings["partition"] = np.random.choice(4, len(ratings), replace=True) with invoker(ratings, _build_predict, 2) as inv: preds = inv.map(range(4)) diff --git a/tests/test_knn_user_user.py b/tests/test_knn_user_user.py index 663677091..7fd2c90a3 100644 --- a/tests/test_knn_user_user.py +++ b/tests/test_knn_user_user.py @@ -23,28 +23,28 @@ def test_uu_dft_config(): algo = knn.UserUser(30) assert algo.nnbrs == 30 assert algo.center - assert algo.aggregate == 'weighted-average' + assert algo.aggregate == "weighted-average" assert algo.use_ratings def test_uu_exp_config(): - algo = knn.UserUser(30, feedback='explicit') + algo = knn.UserUser(30, feedback="explicit") assert algo.nnbrs == 30 assert algo.center - assert algo.aggregate == 'weighted-average' + assert algo.aggregate == "weighted-average" assert algo.use_ratings def test_uu_imp_config(): - algo = knn.UserUser(30, feedback='implicit') + algo = knn.UserUser(30, feedback="implicit") assert algo.nnbrs == 30 assert not algo.center - assert algo.aggregate == 'sum' + assert algo.aggregate == "sum" assert not algo.use_ratings def test_uu_imp_clone(): - algo = knn.UserUser(30, feedback='implicit') + algo = knn.UserUser(30, feedback="implicit") a2 = clone(algo) assert a2.get_params() == algo.get_params() @@ -57,22 +57,24 @@ def test_uu_train(): assert ret is algo # it should have computed correct means - umeans = ml_ratings.groupby('user').rating.mean() - mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean') + umeans = ml_ratings.groupby("user").rating.mean() + mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean") umeans, mlmeans = umeans.align(mlmeans) assert mlmeans.values == approx(umeans.values) # we should be able to reconstruct rating values - uir = ml_ratings.set_index(['user', 'item']).rating + uir = ml_ratings.set_index(["user", "item"]).rating r_items = algo.transpose_matrix_.rowinds() - ui_rbdf = pd.DataFrame({ - 'user': algo.user_index_[algo.transpose_matrix_.colinds], - 'item': algo.item_index_[r_items], - 'nrating': algo.transpose_matrix_.values - }).set_index(['user', 'item']) + ui_rbdf = pd.DataFrame( + { + "user": algo.user_index_[algo.transpose_matrix_.colinds], + "item": algo.item_index_[r_items], + "nrating": algo.transpose_matrix_.values, + } + ).set_index(["user", "item"]) ui_rbdf = ui_rbdf.join(mlmeans) - ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean'] - ui_rbdf['orig_rating'] = uir + ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"] + ui_rbdf["orig_rating"] = uir assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values) @@ -122,7 +124,7 @@ def test_uu_predict_live_ratings(): no4 = ml_ratings[ml_ratings.user != 4] algo.fit(no4) - ratings = ml_ratings[ml_ratings.user == 4].set_index('item').rating + ratings = ml_ratings[ml_ratings.user == 4].set_index("item").rating preds = algo.predict_for_user(20381, [1016, 2091], ratings) assert len(preds) == 2 @@ -132,37 +134,39 @@ def test_uu_predict_live_ratings(): def test_uu_save_load(tmp_path): orig = knn.UserUser(30) - _log.info('training model') + _log.info("training model") orig.fit(ml_ratings) - fn = tmp_path / 'uu.model' - _log.info('saving to %s', fn) - with fn.open('wb') as f: + fn = tmp_path / "uu.model" + _log.info("saving to %s", fn) + with fn.open("wb") as f: pickle.dump(orig, f) - _log.info('reloading model') - with fn.open('rb') as f: + _log.info("reloading model") + with fn.open("rb") as f: algo = pickle.load(f) - _log.info('checking model') + _log.info("checking model") # it should have computed correct means - umeans = ml_ratings.groupby('user').rating.mean() - mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean') + umeans = ml_ratings.groupby("user").rating.mean() + mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean") umeans, mlmeans = umeans.align(mlmeans) assert mlmeans.values == approx(umeans.values) # we should be able to reconstruct rating values - uir = ml_ratings.set_index(['user', 'item']).rating + uir = ml_ratings.set_index(["user", "item"]).rating r_items = algo.transpose_matrix_.rowinds() - ui_rbdf = pd.DataFrame({ - 'user': algo.user_index_[algo.transpose_matrix_.colinds], - 'item': algo.item_index_[r_items], - 'nrating': algo.transpose_matrix_.values - }).set_index(['user', 'item']) + ui_rbdf = pd.DataFrame( + { + "user": algo.user_index_[algo.transpose_matrix_.colinds], + "item": algo.item_index_[r_items], + "nrating": algo.transpose_matrix_.values, + } + ).set_index(["user", "item"]) ui_rbdf = ui_rbdf.join(mlmeans) - ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean'] - ui_rbdf['orig_rating'] = uir + ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"] + ui_rbdf["orig_rating"] = uir assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values) # running the predictor should work @@ -183,8 +187,8 @@ def test_uu_predict_unknown_empty(): def test_uu_implicit(): "Train and use user-user on an implicit data set." - algo = knn.UserUser(20, feedback='implicit') - data = ml_ratings.loc[:, ['user', 'item']] + algo = knn.UserUser(20, feedback="implicit") + data = ml_ratings.loc[:, ["user", "item"]] algo.fit(data) assert algo.user_means_ is None @@ -200,8 +204,8 @@ def test_uu_implicit(): @mark.slow def test_uu_save_load_implicit(tmp_path): "Save and load user-user on an implicit data set." - orig = knn.UserUser(20, feedback='implicit') - data = ml_ratings.loc[:, ['user', 'item']] + orig = knn.UserUser(20, feedback="implicit") + data = ml_ratings.loc[:, ["user", "item"]] orig.fit(data) ser = pickle.dumps(orig) @@ -226,25 +230,25 @@ def test_uu_known_preds(): from lenskit import batch algo = knn.UserUser(30, min_sim=1.0e-6) - _log.info('training %s on ml data', algo) + _log.info("training %s on ml data", algo) algo.fit(lktu.ml_test.ratings) dir = Path(__file__).parent - pred_file = dir / 'user-user-preds.csv' - _log.info('reading known predictions from %s', pred_file) + pred_file = dir / "user-user-preds.csv" + _log.info("reading known predictions from %s", pred_file) known_preds = pd.read_csv(str(pred_file)) - pairs = known_preds.loc[:, ['user', 'item']] - _log.info('generating %d known predictions', len(pairs)) + pairs = known_preds.loc[:, ["user", "item"]] + _log.info("generating %d known predictions", len(pairs)) preds = batch.predict(algo, pairs) - merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds) + merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds) assert len(merged) == len(preds) - merged['error'] = merged.expected - merged.prediction + merged["error"] = merged.expected - merged.prediction try: assert not any(merged.prediction.isna() & merged.expected.notna()) except AssertionError as e: bad = merged[merged.prediction.isna() & merged.expected.notna()] - _log.error('%d missing predictions:\n%s', len(bad), bad) + _log.error("%d missing predictions:\n%s", len(bad), bad) raise e err = merged.error @@ -253,22 +257,23 @@ def test_uu_known_preds(): assert all(err.abs() < 0.01) except AssertionError as e: bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)] - _log.error('%d erroneous predictions:\n%s', len(bad), bad) + _log.error("%d erroneous predictions:\n%s", len(bad), bad) raise e def __batch_eval(job): from lenskit import batch + algo, train, test = job - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return batch.predict(algo, test) @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_uu_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias @@ -286,30 +291,30 @@ def test_uu_batch_accuracy(): mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.71, abs=0.05) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.91, abs=0.055) @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_uu_implicit_batch_accuracy(): from lenskit import batch, topn import lenskit.crossfold as xf ratings = lktu.ml100k.ratings - algo = knn.UserUser(30, center=False, aggregate='sum') + algo = knn.UserUser(30, center=False, aggregate="sum") folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2))) all_test = pd.concat(f.test for f in folds) rec_lists = [] for train, test in folds: - _log.info('running training') + _log.info("running training") rec_algo = Recommender.adapt(algo) - rec_algo.fit(train.loc[:, ['user', 'item']]) - _log.info('testing %d users', test.user.nunique()) + rec_algo.fit(train.loc[:, ["user", "item"]]) + _log.info("testing %d users", test.user.nunique()) recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2) rec_lists.append(recs) recs = pd.concat(rec_lists) diff --git a/tests/test_math_solve.py b/tests/test_math_solve.py index 9568c4da2..cac54caa0 100644 --- a/tests/test_math_solve.py +++ b/tests/test_math_solve.py @@ -17,7 +17,7 @@ def square_problem(draw, scale=10): size = draw(st.integers(2, 100)) # Hypothesis doesn't do well at generating problem data, so go with this - seed = draw(st.integers(min_value=0, max_value=2**32-1)) + seed = draw(st.integers(min_value=0, max_value=2**32 - 1)) rng = np.random.RandomState(seed) A = rng.randn(size, size) * scale b = rng.randn(size) * scale diff --git a/tests/test_matrix.py b/tests/test_matrix.py index bef4f71cb..408d7b3b3 100644 --- a/tests/test_matrix.py +++ b/tests/test_matrix.py @@ -17,17 +17,17 @@ def test_sparse_matrix(rng): assert mat.ncols == ratings.item.nunique() # user indicators should correspond to user item counts - ucounts = ratings.groupby('user').item.count() + ucounts = ratings.groupby("user").item.count() ucounts = ucounts.loc[uidx].cumsum() assert all(mat.rowptrs[1:] == ucounts.values) # verify rating values - ratings = ratings.set_index(['user', 'item']) + ratings = ratings.set_index(["user", "item"]) for u in rng.choice(uidx, size=50): ui = uidx.get_loc(u) vs = mat.row_vs(ui) vs = pd.Series(vs, iidx[mat.row_cs(ui)]) - rates = ratings.loc[u]['rating'] + rates = ratings.loc[u]["rating"] vs, rates = vs.align(rates) assert not any(vs.isna()) assert not any(rates.isna()) @@ -36,7 +36,7 @@ def test_sparse_matrix(rng): def test_sparse_matrix_implicit(): ratings = ml_test.ratings - ratings = ratings.loc[:, ['user', 'item']] + ratings = ratings.loc[:, ["user", "item"]] mat, uidx, iidx = sparse_ratings(ratings) assert mat.nrows == len(uidx) @@ -47,11 +47,11 @@ def test_sparse_matrix_implicit(): @mark.parametrize( - 'format, sps_fmt_checker', + "format, sps_fmt_checker", [ (True, sps.isspmatrix_csr), - ('csr', sps.isspmatrix_csr), - ('coo', sps.isspmatrix_coo), + ("csr", sps.isspmatrix_csr), + ("coo", sps.isspmatrix_coo), ], ) def test_sparse_matrix_scipy(format, sps_fmt_checker): @@ -64,7 +64,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker): assert len(iidx) == ratings.item.nunique() # user indicators should correspond to user item counts - ucounts = ratings.groupby('user').item.count() + ucounts = ratings.groupby("user").item.count() ucounts = ucounts.loc[uidx].cumsum() if sps.isspmatrix_coo(mat): mat = mat.tocsr() @@ -73,7 +73,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker): def test_sparse_matrix_scipy_implicit(): ratings = ml_test.ratings - ratings = ratings.loc[:, ['user', 'item']] + ratings = ratings.loc[:, ["user", "item"]] mat, uidx, iidx = sparse_ratings(ratings, scipy=True) assert sps.issparse(mat) @@ -86,8 +86,8 @@ def test_sparse_matrix_scipy_implicit(): def test_sparse_matrix_indexes(rng): ratings = ml_test.ratings - uidx = pd.Index(rng.permutation(ratings['user'].unique())) - iidx = pd.Index(rng.permutation(ratings['item'].unique())) + uidx = pd.Index(rng.permutation(ratings["user"].unique())) + iidx = pd.Index(rng.permutation(ratings["item"].unique())) mat, _uidx, _iidx = sparse_ratings(ratings, users=uidx, items=iidx) @@ -97,12 +97,12 @@ def test_sparse_matrix_indexes(rng): assert len(_iidx) == ratings.item.nunique() # verify rating values - ratings = ratings.set_index(['user', 'item']) + ratings = ratings.set_index(["user", "item"]) for u in rng.choice(_uidx, size=50): ui = _uidx.get_loc(u) vs = mat.row_vs(ui) vs = pd.Series(vs, _iidx[mat.row_cs(ui)]) - rates = ratings.loc[u]['rating'] + rates = ratings.loc[u]["rating"] vs, rates = vs.align(rates) assert not any(vs.isna()) assert not any(rates.isna()) diff --git a/tests/test_ml20m.py b/tests/test_ml20m.py index b36df4d50..3b4387eac 100644 --- a/tests/test_ml20m.py +++ b/tests/test_ml20m.py @@ -16,6 +16,7 @@ from lenskit.algorithms.basic import Popular from lenskit.algorithms.als import BiasedMF from lenskit.algorithms import item_knn as knn + try: import lenskit_tf except: @@ -28,7 +29,7 @@ _log = logging.getLogger(__name__) -_ml_path = Path('data/ml-20m') +_ml_path = Path("data/ml-20m") if _ml_path.exists(): _ml_20m = MovieLens(_ml_path) else: @@ -40,36 +41,36 @@ def ml20m(): if _ml_20m: return _ml_20m.ratings else: - pytest.skip('ML-20M not available') + pytest.skip("ML-20M not available") @pytest.mark.slow @pytest.mark.realdata -@pytest.mark.parametrize('n_jobs', [1, 2]) +@pytest.mark.parametrize("n_jobs", [1, 2]) def test_pop_recommend(ml20m, rng, n_jobs): - users = rng.choice(ml20m['user'].unique(), 10000, replace=False) + users = rng.choice(ml20m["user"].unique(), 10000, replace=False) algo = Popular() - _log.info('training %s', algo) + _log.info("training %s", algo) algo.fit(ml20m) - _log.info('recommending with %s', algo) + _log.info("recommending with %s", algo) recs = batch.recommend(algo, users, 10, n_jobs=n_jobs) - assert recs['user'].nunique() == 10000 + assert recs["user"].nunique() == 10000 @pytest.mark.realdata @pytest.mark.slow def test_als_isolate(ml20m, rng): - users = rng.choice(ml20m['user'].unique(), 5000, replace=False) + users = rng.choice(ml20m["user"].unique(), 5000, replace=False) algo = BiasedMF(20, iterations=10) algo = Recommender.adapt(algo) - _log.info('training %s', algo) + _log.info("training %s", algo) ares = batch.train_isolated(algo, ml20m) try: - _log.info('recommending with %s', algo) + _log.info("recommending with %s", algo) recs = batch.recommend(ares, users, 10) - assert recs['user'].nunique() == 5000 - _log.info('predicting with %s', algo) + assert recs["user"].nunique() == 5000 + _log.info("predicting with %s", algo) pairs = ml20m.sample(1000) preds = batch.predict(ares, pairs) assert len(preds) == len(pairs) @@ -80,14 +81,16 @@ def test_als_isolate(ml20m, rng): @pytest.mark.realdata @pytest.mark.slow @pytest.mark.skip -@pytest.mark.skipif(lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason='TensorFlow not available') +@pytest.mark.skipif( + lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason="TensorFlow not available" +) def test_tf_isvd(ml20m): algo = lenskit_tf.IntegratedBiasMF(20) def eval(train, test): - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return batch.predict(algo, test) folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2)) @@ -95,5 +98,5 @@ def eval(train, test): mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.60, abs=0.025) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05) diff --git a/tests/test_parallel.py b/tests/test_parallel.py index 654be1588..6bfb3434c 100644 --- a/tests/test_parallel.py +++ b/tests/test_parallel.py @@ -19,11 +19,11 @@ def _mul_op(m, v): def _worker_status(blob, *args): - _log.info('in worker %s', mp.current_process().name) + _log.info("in worker %s", mp.current_process().name) return os.getpid(), is_worker(), is_mp_worker() -@mark.parametrize('n_jobs', [None, 1, 2, 8]) +@mark.parametrize("n_jobs", [None, 1, 2, 8]) def test_invoke_matrix(n_jobs): matrix = np.random.randn(100, 100) vectors = [np.random.randn(100) for i in range(100)] @@ -35,51 +35,51 @@ def test_invoke_matrix(n_jobs): def test_mp_is_worker(): - with invoker('foo', _worker_status, 2) as loop: + with invoker("foo", _worker_status, 2) as loop: res = list(loop.map(range(10))) assert all([w for (pid, w, mpw) in res]) assert all([mpw for (pid, w, mpw) in res]) def test_proc_count_default(): - with set_env_var('LK_NUM_PROCS', None): + with set_env_var("LK_NUM_PROCS", None): assert proc_count() == mp.cpu_count() // 2 assert proc_count(level=1) == 2 def test_proc_count_no_div(): - with set_env_var('LK_NUM_PROCS', None): + with set_env_var("LK_NUM_PROCS", None): assert proc_count(1) == mp.cpu_count() def test_proc_count_env(): - with set_env_var('LK_NUM_PROCS', '17'): + with set_env_var("LK_NUM_PROCS", "17"): assert proc_count() == 17 assert proc_count(level=1) == 1 def test_proc_count_max(): - with set_env_var('LK_NUM_PROCS', None): + with set_env_var("LK_NUM_PROCS", None): assert proc_count(max_default=1) == 1 def test_proc_count_nest_env(): - with set_env_var('LK_NUM_PROCS', '7,3'): + with set_env_var("LK_NUM_PROCS", "7,3"): assert proc_count() == 7 assert proc_count(level=1) == 3 assert proc_count(level=2) == 1 def _sp_matmul(a1, a2, *, fail=False): - _log.info('in worker process') + _log.info("in worker process") if fail: - raise RuntimeError('you rang?') + raise RuntimeError("you rang?") else: return a1 @ a2 def _sp_matmul_p(a1, a2, *, method=None, fail=False): - _log.info('in worker process') + _log.info("in worker process") return persist(a1 @ a2, method=method).transfer() @@ -99,10 +99,10 @@ def test_run_sp_fail(): run_sp(_sp_matmul, a1, a2, fail=True) -@pytest.mark.parametrize('method', [None, 'binpickle', 'shm']) +@pytest.mark.parametrize("method", [None, "binpickle", "shm"]) def test_run_sp_persist(method): - if method == 'shm' and not SHM_AVAILABLE: - pytest.skip('SHM backend not available') + if method == "shm" and not SHM_AVAILABLE: + pytest.skip("SHM backend not available") a1 = np.random.randn(100, 100) a2 = np.random.randn(100, 100) @@ -116,7 +116,7 @@ def test_run_sp_persist(method): def test_sp_is_worker(): - pid, w, mpw = run_sp(_worker_status, 'fishtank') + pid, w, mpw = run_sp(_worker_status, "fishtank") assert pid != os.getpid() assert w assert not mpw @@ -131,4 +131,4 @@ def test_sp_random_seed(): seed = run_sp(_get_seed) # we should spawn a seed for the worker assert seed.entropy == init.entropy - assert seed.spawn_key == (init.n_children_spawned - 1, ) + assert seed.spawn_key == (init.n_children_spawned - 1,) diff --git a/tests/test_popular.py b/tests/test_popular.py index b7be88cb0..790bd98f5 100644 --- a/tests/test_popular.py +++ b/tests/test_popular.py @@ -6,15 +6,15 @@ import lenskit.util.test as lktu -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_popular(): algo = basic.Popular() algo.fit(lktu.ml_test.ratings) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() counts = counts.nlargest(100) assert algo.item_pop_.max() == counts.max() @@ -31,7 +31,7 @@ def test_popular(): def test_popular_excludes_rated(): algo = basic.Popular() algo.fit(lktu.ml_test.ratings) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() counts = counts.nlargest(100) recs = algo.recommend(100, 100) @@ -40,16 +40,16 @@ def test_popular_excludes_rated(): # make sure we didn't recommend anything the user likes ratings = lktu.ml_test.ratings - urates = ratings.set_index(['user', 'item']) + urates = ratings.set_index(["user", "item"]) urates = urates.loc[100, :] - match = recs.join(urates, on='item', how='inner') + match = recs.join(urates, on="item", how="inner") assert len(match) == 0 def test_pop_candidates(): algo = basic.Popular() algo.fit(lktu.ml_test.ratings) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() items = lktu.ml_test.ratings.item.unique() assert algo.item_pop_.max() == counts.max() @@ -75,7 +75,7 @@ def test_pop_save_load(): mod = pickle.dumps(original) algo = pickle.loads(mod) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() counts = counts.nlargest(100) assert algo.item_pop_.max() == counts.max() @@ -95,7 +95,7 @@ def test_popscore_quantile(rng): assert algo.item_scores_.max() == 1.0 - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() counts = counts.sort_values() winner = counts.index[-1] @@ -103,10 +103,10 @@ def test_popscore_quantile(rng): def test_popscore_rank(rng): - algo = basic.PopScore('rank') + algo = basic.PopScore("rank") algo.fit(lktu.ml_test.ratings) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() counts = counts.sort_values() assert algo.item_scores_.max() == len(counts) @@ -116,10 +116,10 @@ def test_popscore_rank(rng): def test_popscore_counts(rng): - algo = basic.PopScore('count') + algo = basic.PopScore("count") algo.fit(lktu.ml_test.ratings) - counts = lktu.ml_test.ratings.groupby('item').user.count() + counts = lktu.ml_test.ratings.groupby("item").user.count() scores, counts = algo.item_scores_.align(counts) assert all(scores == counts) diff --git a/tests/test_predict_metrics.py b/tests/test_predict_metrics.py index f1797e6e7..461f250bc 100644 --- a/tests/test_predict_metrics.py +++ b/tests/test_predict_metrics.py @@ -9,44 +9,44 @@ def test_check_missing_empty(): - pm._check_missing(pd.Series([], dtype='float64'), 'error') + pm._check_missing(pd.Series([], dtype="float64"), "error") # should pass assert True def test_check_missing_has_values(): - pm._check_missing(pd.Series([1, 3, 2]), 'error') + pm._check_missing(pd.Series([1, 3, 2]), "error") # should pass assert True def test_check_missing_nan_raises(): with raises(ValueError): - pm._check_missing(pd.Series([1, np.nan, 3]), 'error') + pm._check_missing(pd.Series([1, np.nan, 3]), "error") def test_check_missing_raises(): - data = pd.Series([1, 7, 3], ['a', 'b', 'd']) - ref = pd.Series([3, 2, 4], ['b', 'c', 'd']) - ref, data = ref.align(data, join='left') + data = pd.Series([1, 7, 3], ["a", "b", "d"]) + ref = pd.Series([3, 2, 4], ["b", "c", "d"]) + ref, data = ref.align(data, join="left") with raises(ValueError): - pm._check_missing(data, 'error') + pm._check_missing(data, "error") def test_check_joined_ok(): - data = pd.Series([1, 7, 3], ['a', 'b', 'd']) - ref = pd.Series([3, 2, 4], ['b', 'c', 'd']) - ref, data = ref.align(data, join='inner') - pm._check_missing(ref, 'error') + data = pd.Series([1, 7, 3], ["a", "b", "d"]) + ref = pd.Series([3, 2, 4], ["b", "c", "d"]) + ref, data = ref.align(data, join="inner") + pm._check_missing(ref, "error") # should get here assert True def test_check_missing_ignore(): - data = pd.Series([1, 7, 3], ['a', 'b', 'd']) - ref = pd.Series([3, 2, 4], ['b', 'c', 'd']) - ref, data = ref.align(data, join='left') - pm._check_missing(data, 'ignore') + data = pd.Series([1, 7, 3], ["a", "b", "d"]) + ref = pd.Series([3, 2, 4], ["b", "c", "d"]) + ref, data = ref.align(data, join="left") + pm._check_missing(data, "ignore") # should get here assert True @@ -103,18 +103,19 @@ def test_rmse_series_two(): def test_rmse_series_subset_axis(): - rmse = pm.rmse(pd.Series([1, 3], ['a', 'c']), pd.Series([3, 4, 1], ['a', 'b', 'c'])) + rmse = pm.rmse(pd.Series([1, 3], ["a", "c"]), pd.Series([3, 4, 1], ["a", "b", "c"])) assert rmse == approx(2) def test_rmse_series_missing_value_error(): with raises(ValueError): - pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c'])) + pm.rmse(pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"])) def test_rmse_series_missing_value_ignore(): - rmse = pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']), - missing='ignore') + rmse = pm.rmse( + pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"]), missing="ignore" + ) assert rmse == approx(2) @@ -159,7 +160,7 @@ def test_mae_series_two(): @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_batch_rmse(): import lenskit.crossfold as xf import lenskit.batch as batch @@ -171,13 +172,13 @@ def test_batch_rmse(): def eval(train, test): algo.fit(train) preds = batch.predict(algo, test) - return preds.set_index(['user', 'item']) + return preds.set_index(["user", "item"]) - results = pd.concat((eval(train, test) - for (train, test) - in xf.partition_users(ratings, 5, xf.SampleN(5)))) + results = pd.concat( + (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5))) + ) - user_rmse = results.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = results.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) # we should have all users users = ratings.user.unique() @@ -224,9 +225,9 @@ def test_user_metric(): preds = batch.predict(algo, test) rmse = pm.user_metric(preds) - u_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + u_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert rmse == approx(u_rmse.mean()) mae = pm.user_metric(preds, metric=pm.mae) - u_mae = preds.groupby('user').apply(lambda df: pm.mae(df.prediction, df.rating)) + u_mae = preds.groupby("user").apply(lambda df: pm.mae(df.prediction, df.rating)) assert mae == approx(u_mae.mean()) diff --git a/tests/test_rerank.py b/tests/test_rerank.py index f496fd9b2..d0f7f1988 100644 --- a/tests/test_rerank.py +++ b/tests/test_rerank.py @@ -9,10 +9,10 @@ def test_plackett_luce_rec(): pop = PopScore() - algo = PlackettLuce(pop, rng_spec='user') + algo = PlackettLuce(pop, rng_spec="user") algo.fit(lktu.ml_test.ratings) - items = lktu.ml_test.ratings['item'].unique() + items = lktu.ml_test.ratings["item"].unique() nitems = len(items) recs1 = algo.recommend(2038, 100) @@ -21,19 +21,19 @@ def test_plackett_luce_rec(): assert len(recs2) == 100 # we don't get exactly the same set of recs - assert set(recs1['item']) != set(recs2['item']) + assert set(recs1["item"]) != set(recs2["item"]) recs_all = algo.recommend(2038) assert len(recs_all) == nitems - assert set(items) == set(recs_all['item']) + assert set(items) == set(recs_all["item"]) def test_plackett_luce_pred(): bias = Bias() - algo = PlackettLuce(bias, rng_spec='user') + algo = PlackettLuce(bias, rng_spec="user") algo.fit(lktu.ml_test.ratings) - items = lktu.ml_test.ratings['item'].unique() + items = lktu.ml_test.ratings["item"].unique() nitems = len(items) recs1 = algo.recommend(2038, 100) @@ -42,8 +42,8 @@ def test_plackett_luce_pred(): assert len(recs2) == 100 # we don't get exactly the same set of recs - assert set(recs1['item']) != set(recs2['item']) + assert set(recs1["item"]) != set(recs2["item"]) recs_all = algo.recommend(2038) assert len(recs_all) == nitems - assert set(items) == set(recs_all['item']) + assert set(items) == set(recs_all["item"]) diff --git a/tests/test_sharing.py b/tests/test_sharing.py index 3ccc206c2..8e1033029 100644 --- a/tests/test_sharing.py +++ b/tests/test_sharing.py @@ -33,7 +33,7 @@ def test_persist_bpk(): share.close() -@mark.skipif(not lks.SHM_AVAILABLE, reason='shared_memory not available') +@mark.skipif(not lks.SHM_AVAILABLE, reason="shared_memory not available") def test_persist_shm(): matrix = np.random.randn(1000, 100) share = lks.persist_shm(matrix) @@ -62,7 +62,7 @@ def test_persist(): def test_persist_dir(tmp_path): "Test persistence with a configured directory" matrix = np.random.randn(1000, 100) - with lktu.set_env_var('LK_TEMP_DIR', os.fspath(tmp_path)): + with lktu.set_env_var("LK_TEMP_DIR", os.fspath(tmp_path)): share = lks.persist(matrix) assert isinstance(share, lks.BPKPersisted) @@ -79,7 +79,7 @@ def test_persist_method(): "Test persistence with a specified method" matrix = np.random.randn(1000, 100) - share = lks.persist(matrix, method='binpickle') + share = lks.persist(matrix, method="binpickle") assert isinstance(share, lks.BPKPersisted) try: diff --git a/tests/test_svd.py b/tests/test_svd.py index a62310ebb..ee4d7653b 100644 --- a/tests/test_svd.py +++ b/tests/test_svd.py @@ -13,11 +13,11 @@ _log = logging.getLogger(__name__) -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) -need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason='scikit-learn not installed') +need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason="scikit-learn not installed") @need_skl @@ -71,6 +71,7 @@ def test_svd_clone(): assert a2.bias.user_damping == algo.bias.user_damping assert a2.bias.item_damping == algo.bias.item_damping + @need_skl @mark.slow def test_svd_save_load(): @@ -80,7 +81,7 @@ def test_svd_save_load(): original.fit(ratings) mod = pickle.dumps(original) - _log.info('serialized to %d bytes', len(mod)) + _log.info("serialized to %d bytes", len(mod)) algo = pickle.loads(mod) assert algo.bias.mean_ == original.bias.mean_ @@ -92,7 +93,7 @@ def test_svd_save_load(): @need_skl @mark.slow @mark.eval -@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present') +@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present") def test_svd_batch_accuracy(): from lenskit.algorithms import basic from lenskit.algorithms import bias @@ -106,9 +107,9 @@ def test_svd_batch_accuracy(): algo = basic.Fallback(svd_algo, bias.Bias(damping=10)) def eval(train, test): - _log.info('running training') + _log.info("running training") algo.fit(train) - _log.info('testing %d users', test.user.nunique()) + _log.info("testing %d users", test.user.nunique()) return batch.predict(algo, test) folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2)) @@ -116,5 +117,5 @@ def eval(train, test): mae = pm.mae(preds.prediction, preds.rating) assert mae == approx(0.74, abs=0.025) - user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating)) + user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating)) assert user_rmse.mean() == approx(0.92, abs=0.05) diff --git a/tests/test_topn_analysis.py b/tests/test_topn_analysis.py index cfe2ee703..1253b8b0e 100644 --- a/tests/test_topn_analysis.py +++ b/tests/test_topn_analysis.py @@ -19,18 +19,21 @@ def test_split_keys(): rla = topn.RecListAnalysis() - recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score'], - ['user', 'item', 'rating']) - assert truth == ['user'] - assert recs == ['algorithm', 'user'] + recs, truth = topn._df_keys( + ["algorithm", "user", "item", "rank", "score"], ["user", "item", "rating"] + ) + assert truth == ["user"] + assert recs == ["algorithm", "user"] def test_split_keys_gcol(): - recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score', 'fishtank'], - ['user', 'item', 'rating'], - ['algorithm', 'fishtank', 'user']) - assert truth == ['user'] - assert recs == ['algorithm', 'fishtank', 'user'] + recs, truth = topn._df_keys( + ["algorithm", "user", "item", "rank", "score", "fishtank"], + ["user", "item", "rating"], + ["algorithm", "fishtank", "user"], + ) + assert truth == ["user"] + assert recs == ["algorithm", "fishtank", "user"] def test_run_one(): @@ -38,10 +41,10 @@ def test_run_one(): rla.add_metric(topn.precision) rla.add_metric(topn.recall) - recs = pd.DataFrame({'user': 1, 'item': [2]}) - recs.name = 'recs' - truth = pd.DataFrame({'user': 1, 'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth.name = 'truth' + recs = pd.DataFrame({"user": 1, "item": [2]}) + recs.name = "recs" + truth = pd.DataFrame({"user": 1, "item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth.name = "truth" print(recs) print(truth) @@ -49,13 +52,13 @@ def test_run_one(): res = rla.compute(recs, truth) print(res) - assert res.index.name == 'user' + assert res.index.name == "user" assert res.index.is_unique assert len(res) == 1 assert all(res.index == 1) assert all(res.precision == 1.0) - assert res.recall.values == approx(1/3) + assert res.recall.values == approx(1 / 3) def test_run_two(): @@ -64,17 +67,21 @@ def test_run_two(): rla.add_metric(topn.recall) rla.add_metric(topn.ndcg) - recs = pd.DataFrame({ - 'data': 'a', - 'user': ['a', 'a', 'a', 'b', 'b'], - 'item': [2, 3, 1, 4, 5], - 'rank': [1, 2, 3, 1, 2] - }) - truth = pd.DataFrame({ - 'user': ['a', 'a', 'a', 'b', 'b', 'b'], - 'item': [1, 2, 3, 1, 5, 6], - 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] - }) + recs = pd.DataFrame( + { + "data": "a", + "user": ["a", "a", "a", "b", "b"], + "item": [2, 3, 1, 4, 5], + "rank": [1, 2, 3, 1, 2], + } + ) + truth = pd.DataFrame( + { + "user": ["a", "a", "a", "b", "b", "b"], + "item": [1, 2, 3, 1, 5, 6], + "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0], + } + ) def prog(inner): assert len(inner) == 2 @@ -86,101 +93,110 @@ def prog(inner): assert res.columns.nlevels == 1 assert len(res) == 2 assert res.index.nlevels == 2 - assert res.index.names == ['data', 'user'] - assert all(res.index.levels[0] == 'a') - assert all(res.index.levels[1] == ['a', 'b']) - assert all(res.reset_index().user == ['a', 'b']) + assert res.index.names == ["data", "user"] + assert all(res.index.levels[0] == "a") + assert all(res.index.levels[1] == ["a", "b"]) + assert all(res.reset_index().user == ["a", "b"]) partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3]) assert res.ndcg.values == approx([1.0, partial_ndcg]) - assert res.precision.values == approx([1.0, 1/2]) - assert res.recall.values == approx([1.0, 1/3]) + assert res.precision.values == approx([1.0, 1 / 2]) + assert res.recall.values == approx([1.0, 1 / 3]) def test_inner_format(): rla = topn.RecListAnalysis() - recs = pd.DataFrame({ - 'data': 'a', - 'user': ['a', 'a', 'a', 'b', 'b'], - 'item': [2, 3, 1, 4, 5], - 'rank': [1, 2, 3, 1, 2] - }) - truth = pd.DataFrame({ - 'user': ['a', 'a', 'a', 'b', 'b', 'b'], - 'item': [1, 2, 3, 1, 5, 6], - 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] - }) - - def inner(recs, truth, foo='a'): - assert foo == 'b' - assert set(recs.columns) == set(['LKRecID', 'LKTruthID', 'item', 'rank']) - assert truth.index.name == 'item' + recs = pd.DataFrame( + { + "data": "a", + "user": ["a", "a", "a", "b", "b"], + "item": [2, 3, 1, 4, 5], + "rank": [1, 2, 3, 1, 2], + } + ) + truth = pd.DataFrame( + { + "user": ["a", "a", "a", "b", "b", "b"], + "item": [1, 2, 3, 1, 5, 6], + "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0], + } + ) + + def inner(recs, truth, foo="a"): + assert foo == "b" + assert set(recs.columns) == set(["LKRecID", "LKTruthID", "item", "rank"]) + assert truth.index.name == "item" assert truth.index.is_unique print(truth) - assert all(truth.columns == ['rating']) - return len(recs.join(truth, on='item', how='inner')) - rla.add_metric(inner, name='bob', foo='b') + assert all(truth.columns == ["rating"]) + return len(recs.join(truth, on="item", how="inner")) + + rla.add_metric(inner, name="bob", foo="b") res = rla.compute(recs, truth) print(res) assert len(res) == 2 assert res.index.nlevels == 2 - assert res.index.names == ['data', 'user'] - assert all(res.index.levels[0] == 'a') - assert all(res.index.levels[1] == ['a', 'b']) - assert all(res.reset_index().user == ['a', 'b']) - assert all(res['bob'] == [3, 1]) + assert res.index.names == ["data", "user"] + assert all(res.index.levels[0] == "a") + assert all(res.index.levels[1] == ["a", "b"]) + assert all(res.reset_index().user == ["a", "b"]) + assert all(res["bob"] == [3, 1]) def test_spec_group_cols(): - rla = topn.RecListAnalysis(group_cols=['data', 'user']) + rla = topn.RecListAnalysis(group_cols=["data", "user"]) rla.add_metric(topn.precision) rla.add_metric(topn.recall) rla.add_metric(topn.ndcg) - recs = pd.DataFrame({ - 'data': 'a', - 'user': ['a', 'a', 'a', 'b', 'b'], - 'item': [2, 3, 1, 4, 5], - 'rank': [1, 2, 3, 1, 2], - 'wombat': np.random.randn(5) - }) - truth = pd.DataFrame({ - 'user': ['a', 'a', 'a', 'b', 'b', 'b'], - 'item': [1, 2, 3, 1, 5, 6], - 'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0] - }) + recs = pd.DataFrame( + { + "data": "a", + "user": ["a", "a", "a", "b", "b"], + "item": [2, 3, 1, 4, 5], + "rank": [1, 2, 3, 1, 2], + "wombat": np.random.randn(5), + } + ) + truth = pd.DataFrame( + { + "user": ["a", "a", "a", "b", "b", "b"], + "item": [1, 2, 3, 1, 5, 6], + "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0], + } + ) res = rla.compute(recs, truth) print(res) assert len(res) == 2 assert res.index.nlevels == 2 - assert res.index.names == ['data', 'user'] - assert all(res.index.levels[0] == 'a') - assert all(res.index.levels[1] == ['a', 'b']) - assert all(res.reset_index().user == ['a', 'b']) + assert res.index.names == ["data", "user"] + assert all(res.index.levels[0] == "a") + assert all(res.index.levels[1] == ["a", "b"]) + assert all(res.reset_index().user == ["a", "b"]) partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3]) assert res.ndcg.values == approx([1.0, partial_ndcg]) - assert res.precision.values == approx([1.0, 1/2]) - assert res.recall.values == approx([1.0, 1/3]) + assert res.precision.values == approx([1.0, 1 / 2]) + assert res.recall.values == approx([1.0, 1 / 3]) def test_java_equiv(): dir = Path(__file__).parent - metrics = pd.read_csv(str(dir / 'topn-java-metrics.csv')) - recs = pd.read_csv(str(dir / 'topn-java-recs.csv')) - truth = pd.read_csv(str(dir / 'topn-java-truth.csv')) + metrics = pd.read_csv(str(dir / "topn-java-metrics.csv")) + recs = pd.read_csv(str(dir / "topn-java-recs.csv")) + truth = pd.read_csv(str(dir / "topn-java-truth.csv")) rla = topn.RecListAnalysis() rla.add_metric(topn.ndcg) res = rla.compute(recs, truth) umm = pd.merge(metrics, res.reset_index()) - umm['err'] = umm['ndcg'] - umm['Java.nDCG'] - _log.info('merged: \n%s', umm) - assert umm['err'].values == approx(0, abs=1.0e-6) + umm["err"] = umm["ndcg"] - umm["Java.nDCG"] + _log.info("merged: \n%s", umm) + assert umm["err"].values == approx(0, abs=1.0e-6) @mark.slow @@ -196,20 +212,20 @@ def test_fill_users(): train, test = next(splits) algo.fit(train) - rec_users = test['user'].sample(50).unique() + rec_users = test["user"].sample(50).unique() assert len(rec_users) < 50 recs = batch.recommend(algo, rec_users, 25) scores = rla.compute(recs, test, include_missing=True) - assert len(scores) == test['user'].nunique() - assert scores['recall'].notna().sum() == len(rec_users) - assert all(scores['ntruth'] == 5) + assert len(scores) == test["user"].nunique() + assert scores["recall"].notna().sum() == len(rec_users) + assert all(scores["ntruth"] == 5) mscores = rla.compute(recs, test) assert len(mscores) < len(scores) - recall = scores.loc[scores['recall'].notna(), 'recall'].copy() - recall, mrecall = recall.align(mscores['recall']) + recall = scores.loc[scores["recall"].notna(), "recall"].copy() + recall, mrecall = recall.align(mscores["recall"]) assert all(recall == mrecall) @@ -229,65 +245,63 @@ def test_adv_fill_users(): all_test = {} for i, (train, test) in enumerate(splits): a_uu.fit(train) - rec_users = test['user'].sample(50).unique() - all_recs[(i+1, 'UU')] = batch.recommend(a_uu, rec_users, 25) + rec_users = test["user"].sample(50).unique() + all_recs[(i + 1, "UU")] = batch.recommend(a_uu, rec_users, 25) a_ii.fit(train) - rec_users = test['user'].sample(50).unique() - all_recs[(i+1, 'II')] = batch.recommend(a_ii, rec_users, 25) - all_test[i+1] = test + rec_users = test["user"].sample(50).unique() + all_recs[(i + 1, "II")] = batch.recommend(a_ii, rec_users, 25) + all_test[i + 1] = test - recs = pd.concat(all_recs, names=['part', 'algo']) - recs.reset_index(['part', 'algo'], inplace=True) + recs = pd.concat(all_recs, names=["part", "algo"]) + recs.reset_index(["part", "algo"], inplace=True) recs.reset_index(drop=True, inplace=True) - test = pd.concat(all_test, names=['part']) - test.reset_index(['part'], inplace=True) + test = pd.concat(all_test, names=["part"]) + test.reset_index(["part"], inplace=True) test.reset_index(drop=True, inplace=True) scores = rla.compute(recs, test, include_missing=True) inames = scores.index.names scores.sort_index(inplace=True) assert len(scores) == 50 * 4 - assert all(scores['ntruth'] == 5) - assert scores['recall'].isna().sum() > 0 - _log.info('scores:\n%s', scores) + assert all(scores["ntruth"] == 5) + assert scores["recall"].isna().sum() > 0 + _log.info("scores:\n%s", scores) - ucounts = scores.reset_index().groupby('algo')['user'].agg(['count', 'nunique']) - assert all(ucounts['count'] == 100) - assert all(ucounts['nunique'] == 100) + ucounts = scores.reset_index().groupby("algo")["user"].agg(["count", "nunique"]) + assert all(ucounts["count"] == 100) + assert all(ucounts["nunique"] == 100) mscores = rla.compute(recs, test) mscores = mscores.reset_index().set_index(inames) mscores.sort_index(inplace=True) assert len(mscores) < len(scores) - _log.info('mscores:\n%s', mscores) + _log.info("mscores:\n%s", mscores) - recall = scores.loc[scores['recall'].notna(), 'recall'].copy() - recall, mrecall = recall.align(mscores['recall']) + recall = scores.loc[scores["recall"].notna(), "recall"].copy() + recall, mrecall = recall.align(mscores["recall"]) assert all(recall == mrecall) -@mark.parametrize('drop_rating', [False, True]) +@mark.parametrize("drop_rating", [False, True]) def test_pr_bulk_match(demo_recs, drop_rating): "bulk and normal match" train, test, recs = demo_recs if drop_rating: - test = test[['user', 'item']] + test = test[["user", "item"]] rla = topn.RecListAnalysis() rla.add_metric(precision) rla.add_metric(recall) # metric without the bulk capabilities - rla.add_metric(lambda *a: precision(*a), name='ind_p') - rla.add_metric(lambda *a: recall(*a), name='ind_r') + rla.add_metric(lambda *a: precision(*a), name="ind_p") + rla.add_metric(lambda *a: recall(*a), name="ind_r") res = rla.compute(recs, test) print(res) - _log.info('precision mismatches:\n%s', - res[res.precision != res.ind_p]) - _log.info('recall mismatches:\n%s', - res[res.recall != res.ind_r]) + _log.info("precision mismatches:\n%s", res[res.precision != res.ind_p]) + _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r]) assert res.precision.values == approx(res.ind_p.values) assert res.recall.values == approx(res.ind_r.values) diff --git a/tests/test_topn_hit.py b/tests/test_topn_hit.py index f907da2f6..b664023f6 100644 --- a/tests/test_topn_hit.py +++ b/tests/test_topn_hit.py @@ -12,8 +12,8 @@ def _test_hit(items, rel, **kwargs): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': rel}).set_index('item') + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": rel}).set_index("item") return hit(recs, truth, **kwargs) @@ -81,7 +81,7 @@ def test_hit_series_array(): hr = _test_hit(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7])) assert hr == 1 - hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, 'u4')) + hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, "u4")) assert hr == 0 @@ -92,7 +92,7 @@ def test_hit_array(): hr = _test_hit(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7])) assert hr == 1 - hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, 'u4')) + hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, "u4")) assert hr == 0 @@ -122,19 +122,18 @@ def test_hit_partial_rel(): def test_hit_bulk_k(demo_recs): "bulk and normal match" train, test, recs = demo_recs - assert test['user'].value_counts().max() > 5 + assert test["user"].value_counts().max() > 5 rla = topn.RecListAnalysis() - rla.add_metric(hit, name='hk', k=5) + rla.add_metric(hit, name="hk", k=5) rla.add_metric(hit) # metric without the bulk capabilities - rla.add_metric(lambda *a, **k: hit(*a, **k), name='ind_hk', k=5) - rla.add_metric(lambda *a: hit(*a), name='ind_h') + rla.add_metric(lambda *a, **k: hit(*a, **k), name="ind_hk", k=5) + rla.add_metric(lambda *a: hit(*a), name="ind_h") res = rla.compute(recs, test) print(res) - _log.info('recall mismatches:\n%s', - res[res.hit != res.ind_h]) + _log.info("recall mismatches:\n%s", res[res.hit != res.ind_h]) assert res.hit.values == approx(res.ind_h.values) assert res.hk.values == approx(res.ind_hk.values) diff --git a/tests/test_topn_mrr.py b/tests/test_topn_mrr.py index 5fc04d268..bb2b73843 100644 --- a/tests/test_topn_mrr.py +++ b/tests/test_topn_mrr.py @@ -11,8 +11,8 @@ def _test_rr(items, rel, **kw): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': rel}).set_index('item') + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": rel}).set_index("item") return recip_rank(recs, truth, **kw) @@ -53,36 +53,36 @@ def test_mrr_series_idx(): def test_mrr_array_late(): "deep -> 0.1" - rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10]) + rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10]) assert rr == approx(0.1) def test_mrr_k_trunc(): - rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10], k=5) + rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10], k=5) assert rr == approx(0.0) - rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10, 5], k=5) + rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10, 5], k=5) assert rr == approx(0.2) def test_mrr_k_short(): - rr = _test_rr(np.arange(1, 5, 1, 'u4'), [2], k=10) + rr = _test_rr(np.arange(1, 5, 1, "u4"), [2], k=10) assert rr == approx(0.5) -@mark.parametrize('drop_rating', [False, True]) +@mark.parametrize("drop_rating", [False, True]) def test_mrr_bulk(demo_recs, drop_rating): "bulk and normal match" train, test, recs = demo_recs if drop_rating: - test = test[['user', 'item']] + test = test[["user", "item"]] rla = RecListAnalysis() rla.add_metric(recip_rank) - rla.add_metric(recip_rank, name='rr_k', k=10) + rla.add_metric(recip_rank, name="rr_k", k=10) # metric without the bulk capabilities - rla.add_metric(lambda *a: recip_rank(*a), name='ind_rr') - rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name='ind_rr_k', k=10) + rla.add_metric(lambda *a: recip_rank(*a), name="ind_rr") + rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name="ind_rr_k", k=10) res = rla.compute(recs, test) assert all(res.recip_rank == res.ind_rr) diff --git a/tests/test_topn_ndcg.py b/tests/test_topn_ndcg.py index 2fdf9e573..830a4b8b6 100644 --- a/tests/test_topn_ndcg.py +++ b/tests/test_topn_ndcg.py @@ -55,71 +55,67 @@ def test_dcg_nan(): def test_dcg_series(): "The DCG function should work on a series" - assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == \ - approx((np.e + np.pi / np.log2(4))) + assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4))) def test_dcg_mult2(): "multiple elements should score correctly" assert _dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi) - assert _dcg(np.array([np.e, 0, 0, np.pi])) == \ - approx((np.e + np.pi / np.log2(4))) + assert _dcg(np.array([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4))) def test_ndcg_empty(): - recs = pd.DataFrame({'item': []}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": []}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth) == approx(0.0) def test_ndcg_no_match(): - recs = pd.DataFrame({'item': [4]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [4]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth) == approx(0.0) def test_ndcg_perfect(): - recs = pd.DataFrame({'item': [2, 3, 1]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [2, 3, 1]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth) == approx(1.0) def test_ndcg_perfect_k_short(): - recs = pd.DataFrame({'item': [2, 3, 1]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [2, 3, 1]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth, k=2) == approx(1.0) assert ndcg(recs[:2], truth, k=2) == approx(1.0) def test_ndcg_wrong(): - recs = pd.DataFrame({'item': [1, 2]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [1, 2]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth) == approx(_dcg([3.0, 5.0] / _dcg([5.0, 4.0, 3.0]))) def test_ndcg_perfect_k(): - recs = pd.DataFrame({'item': [2, 3]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [2, 3]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert ndcg(recs, truth, k=2) == approx(1.0) def test_ndcg_bulk_at_top(): - truth = pd.DataFrame.from_records([ - (1, 50, 3.5), - (1, 30, 3.5) - ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item']) + truth = pd.DataFrame.from_records( + [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"] + ).set_index(["LKTruthID", "item"]) - recs = pd.DataFrame.from_records([ - (1, 1, 50, 1), - (1, 1, 30, 2), - (1, 1, 72, 3) - ], columns=['LKRecID', 'LKTruthID', 'item', 'rank']) + recs = pd.DataFrame.from_records( + [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)], + columns=["LKRecID", "LKTruthID", "item", "rank"], + ) ndcg = _bulk_ndcg(recs, truth) assert len(ndcg) == 1 @@ -128,16 +124,14 @@ def test_ndcg_bulk_at_top(): def test_ndcg_bulk_not_at_top(): - truth = pd.DataFrame.from_records([ - (1, 50, 3.5), - (1, 30, 3.5) - ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item']) + truth = pd.DataFrame.from_records( + [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"] + ).set_index(["LKTruthID", "item"]) - recs = pd.DataFrame.from_records([ - (1, 1, 50, 1), - (1, 1, 72, 2), - (1, 1, 30, 3) - ], columns=['LKRecID', 'LKTruthID', 'item', 'rank']) + recs = pd.DataFrame.from_records( + [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)], + columns=["LKRecID", "LKTruthID", "item", "rank"], + ) ndcg = _bulk_ndcg(recs, truth) assert len(ndcg) == 1 @@ -145,23 +139,23 @@ def test_ndcg_bulk_not_at_top(): assert ndcg.iloc[0] == approx(0.8155, abs=0.001) -@mark.parametrize('drop_rating', [False, True]) +@mark.parametrize("drop_rating", [False, True]) def test_ndcg_bulk_match(demo_recs, drop_rating): "bulk and normal match" train, test, recs = demo_recs if drop_rating: - test = test[['user', 'item']] + test = test[["user", "item"]] rla = RecListAnalysis() rla.add_metric(ndcg) - rla.add_metric(ndcg, name='ndcg_k', k=5) + rla.add_metric(ndcg, name="ndcg_k", k=5) rla.add_metric(dcg) # metric without the bulk capabilities - rla.add_metric(lambda *a: ndcg(*a), name='ind_ndcg') - rla.add_metric(lambda *a, **k: ndcg(*a, **k), name='ind_ndcg_k', k=5) + rla.add_metric(lambda *a: ndcg(*a), name="ind_ndcg") + rla.add_metric(lambda *a, **k: ndcg(*a, **k), name="ind_ndcg_k", k=5) res = rla.compute(recs, test) - res['ind_ideal'] = res['dcg'] / res['ind_ndcg'] + res["ind_ideal"] = res["dcg"] / res["ind_ndcg"] print(res) assert res.ndcg.values == approx(res.ind_ndcg.values) diff --git a/tests/test_topn_precision.py b/tests/test_topn_precision.py index 024c6e536..62df2bc1a 100644 --- a/tests/test_topn_precision.py +++ b/tests/test_topn_precision.py @@ -9,8 +9,8 @@ def _test_prec(items, rel, **k): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': rel}).set_index('item') + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": rel}).set_index("item") return precision(recs, truth, **k) @@ -76,7 +76,7 @@ def test_precision_series_array(): prec = _test_prec(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5])) assert prec == approx(0.5) - prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4')) + prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, "u4")) assert prec == approx(0.25) @@ -87,7 +87,7 @@ def test_precision_array(): prec = _test_prec(np.array([1, 2, 3, 4]), np.array([1, 3, 5])) assert prec == approx(0.5) - prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4')) + prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, "u4")) assert prec == approx(0.25) @@ -118,14 +118,14 @@ def test_prec_short_items(): def test_recall_bulk_k(demo_recs): "bulk and normal match" train, test, recs = demo_recs - assert test['user'].value_counts().max() > 5 + assert test["user"].value_counts().max() > 5 rla = topn.RecListAnalysis() - rla.add_metric(precision, name='pk', k=5) + rla.add_metric(precision, name="pk", k=5) rla.add_metric(precision) # metric without the bulk capabilities - rla.add_metric(lambda *a, **k: precision(*a, **k), name='ind_pk', k=5) - rla.add_metric(lambda *a: precision(*a), name='ind_p') + rla.add_metric(lambda *a, **k: precision(*a, **k), name="ind_pk", k=5) + rla.add_metric(lambda *a: precision(*a), name="ind_p") res = rla.compute(recs, test) assert res.precision.values == approx(res.ind_p.values) diff --git a/tests/test_topn_rbp.py b/tests/test_topn_rbp.py index c3496b276..babe76dc3 100644 --- a/tests/test_topn_rbp.py +++ b/tests/test_topn_rbp.py @@ -15,82 +15,86 @@ def test_rbp_empty(): - recs = pd.DataFrame({'item': []}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": []}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert rbp(recs, truth) == approx(0.0) def test_rbp_no_match(): - recs = pd.DataFrame({'item': [4]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [4]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert rbp(recs, truth) == approx(0.0) def test_rbp_one_match(): - recs = pd.DataFrame({'item': [1]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item') + recs = pd.DataFrame({"item": [1]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item") assert rbp(recs, truth) == approx(0.5) @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95)) def test_rbp_perfect(items, p): n = len(items) - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': items, 'rating': 1}) - truth = truth.set_index('item').sort_index() + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": items, "rating": 1}) + truth = truth.set_index("item").sort_index() assert rbp(recs, truth, patience=p) == approx(np.sum(p ** np.arange(n)) * (1 - p)) @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95)) def test_rbp_perfect_norm(items, p): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': items, 'rating': 1}) - truth = truth.set_index('item').sort_index() + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": items, "rating": 1}) + truth = truth.set_index("item").sort_index() assert rbp(recs, truth, patience=p, normalize=True) == approx(1.0) -@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), - st.integers(1, 100), st.floats(0.05, 0.95)) +@given( + st.lists(st.integers(1), min_size=1, max_size=100, unique=True), + st.integers(1, 100), + st.floats(0.05, 0.95), +) def test_rbp_perfect_k(items, k, p): n = len(items) eff_n = min(n, k) - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': items, 'rating': 1}) - truth = truth.set_index('item').sort_index() + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": items, "rating": 1}) + truth = truth.set_index("item").sort_index() assert rbp(recs, truth, k=k, patience=p) == approx(np.sum(p ** np.arange(eff_n)) * (1 - p)) -@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), - st.integers(1, 100), st.floats(0.05, 0.95)) +@given( + st.lists(st.integers(1), min_size=1, max_size=100, unique=True), + st.integers(1, 100), + st.floats(0.05, 0.95), +) def test_rbp_perfect_k_norm(items, k, p): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': items, 'rating': 1}) - truth = truth.set_index('item').sort_index() + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": items, "rating": 1}) + truth = truth.set_index("item").sort_index() assert rbp(recs, truth, k=k, patience=p, normalize=True) == approx(1.0) def test_rbp_missing(): - recs = pd.DataFrame({'item': [1, 2]}) - truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]}) - truth = truth.set_index('item').sort_index() + recs = pd.DataFrame({"item": [1, 2]}) + truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]}) + truth = truth.set_index("item").sort_index() # (1 + 0.5) * 0.5 assert rbp(recs, truth) == approx(0.75) def test_rbp_bulk_at_top(): - truth = pd.DataFrame.from_records([ - (1, 50, 3.5), - (1, 30, 3.5) - ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item']) + truth = pd.DataFrame.from_records( + [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"] + ).set_index(["LKTruthID", "item"]) - recs = pd.DataFrame.from_records([ - (1, 1, 50, 1), - (1, 1, 30, 2), - (1, 1, 72, 3) - ], columns=['LKRecID', 'LKTruthID', 'item', 'rank']) + recs = pd.DataFrame.from_records( + [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)], + columns=["LKRecID", "LKTruthID", "item", "rank"], + ) rbp = _bulk_rbp(recs, truth) assert len(rbp) == 1 @@ -99,16 +103,14 @@ def test_rbp_bulk_at_top(): def test_rbp_bulk_not_at_top(): - truth = pd.DataFrame.from_records([ - (1, 50, 3.5), - (1, 30, 3.5) - ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item']) + truth = pd.DataFrame.from_records( + [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"] + ).set_index(["LKTruthID", "item"]) - recs = pd.DataFrame.from_records([ - (1, 1, 50, 1), - (1, 1, 72, 2), - (1, 1, 30, 3) - ], columns=['LKRecID', 'LKTruthID', 'item', 'rank']) + recs = pd.DataFrame.from_records( + [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)], + columns=["LKRecID", "LKTruthID", "item", "rank"], + ) rbp = _bulk_rbp(recs, truth) assert len(rbp) == 1 @@ -116,27 +118,29 @@ def test_rbp_bulk_not_at_top(): assert rbp.iloc[0] == approx((1 + 0.25) * 0.5) -@mark.parametrize('normalize', [False, True]) +@mark.parametrize("normalize", [False, True]) def test_rbp_bulk_match(demo_recs, normalize): "bulk and normal match" train, test, recs = demo_recs rla = RecListAnalysis() rla.add_metric(rbp, normalize=normalize) - rla.add_metric(rbp, name='rbp_k', k=5, normalize=normalize) + rla.add_metric(rbp, name="rbp_k", k=5, normalize=normalize) # metric without the bulk capabilities - rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name='ind_rbp') - rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name='ind_rbp_k', k=5) + rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name="ind_rbp") + rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name="ind_rbp_k", k=5) res = rla.compute(recs, test) - res['diff'] = np.abs(res.rbp - res.ind_rbp) - rl = res.nlargest(5, 'diff') - _log.info('res:\n%s', rl) + res["diff"] = np.abs(res.rbp - res.ind_rbp) + rl = res.nlargest(5, "diff") + _log.info("res:\n%s", rl) user = rl.index[0] - _log.info('user: %s\n%s', user, rl.iloc[0]) - _log.info('test:\n%s', test[test['user'] == user]) - urecs = recs[recs['user'] == user].join(test.set_index(['user', 'item'])['rating'], on=['user', 'item'], how='left') - _log.info('recs:\n%s', urecs[urecs['rating'].notnull()]) + _log.info("user: %s\n%s", user, rl.iloc[0]) + _log.info("test:\n%s", test[test["user"] == user]) + urecs = recs[recs["user"] == user].join( + test.set_index(["user", "item"])["rating"], on=["user", "item"], how="left" + ) + _log.info("recs:\n%s", urecs[urecs["rating"].notnull()]) assert res.rbp.values == approx(res.ind_rbp.values) assert res.rbp_k.values == approx(res.ind_rbp_k.values) diff --git a/tests/test_topn_recall.py b/tests/test_topn_recall.py index c11a6ad7f..612acdf08 100644 --- a/tests/test_topn_recall.py +++ b/tests/test_topn_recall.py @@ -12,8 +12,8 @@ def _test_recall(items, rel, **kwargs): - recs = pd.DataFrame({'item': items}) - truth = pd.DataFrame({'item': rel}).set_index('item') + recs = pd.DataFrame({"item": items}) + truth = pd.DataFrame({"item": rel}).set_index("item") return recall(recs, truth, **kwargs) @@ -84,7 +84,7 @@ def test_recall_series_array(): prec = _test_recall(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7])) assert prec == approx(0.5) - prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4')) + prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, "u4")) assert prec == approx(0.2) @@ -95,7 +95,7 @@ def test_recall_array(): prec = _test_recall(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7])) assert prec == approx(0.5) - prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4')) + prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, "u4")) assert prec == approx(0.2) @@ -126,19 +126,18 @@ def test_recall_partial_rel(): def test_recall_bulk_k(demo_recs): "bulk and normal match" train, test, recs = demo_recs - assert test['user'].value_counts().max() > 5 + assert test["user"].value_counts().max() > 5 rla = topn.RecListAnalysis() - rla.add_metric(recall, name='rk', k=5) + rla.add_metric(recall, name="rk", k=5) rla.add_metric(recall) # metric without the bulk capabilities - rla.add_metric(lambda *a, **k: recall(*a, **k), name='ind_rk', k=5) - rla.add_metric(lambda *a: recall(*a), name='ind_r') + rla.add_metric(lambda *a, **k: recall(*a, **k), name="ind_rk", k=5) + rla.add_metric(lambda *a: recall(*a), name="ind_r") res = rla.compute(recs, test) print(res) - _log.info('recall mismatches:\n%s', - res[res.recall != res.ind_r]) + _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r]) assert res.recall.values == approx(res.ind_r.values) assert res.rk.values == approx(res.ind_rk.values) diff --git a/tests/test_topn_recs.py b/tests/test_topn_recs.py index 07d1840fa..e570f8c1f 100644 --- a/tests/test_topn_recs.py +++ b/tests/test_topn_recs.py @@ -7,9 +7,9 @@ import lenskit.util.test as lktu from pytest import approx -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_topn_recommend(): @@ -37,14 +37,14 @@ def test_topn_config(): rec = basic.TopN(pred) rs = str(rec) - assert rs.startswith('TopN/') + assert rs.startswith("TopN/") def test_topn_big(): ratings = lktu.ml_test.ratings users = ratings.user.unique() items = ratings.item.unique() - user_items = ratings.set_index('user').item + user_items = ratings.set_index("user").item algo = basic.TopN(bias.Bias()) a2 = algo.fit(ratings) @@ -55,7 +55,7 @@ def test_topn_big(): recs = algo.recommend(u, 100) assert len(recs) == 100 rated = user_items.loc[u] - assert all(~recs['item'].isin(rated)) + assert all(~recs["item"].isin(rated)) unrated = np.setdiff1d(items, rated) scores = algo.predictor.predict_for_user(u, unrated) top = scores.nlargest(100) diff --git a/tests/test_topn_utils.py b/tests/test_topn_utils.py index c97f1f496..25257c5df 100644 --- a/tests/test_topn_utils.py +++ b/tests/test_topn_utils.py @@ -6,7 +6,7 @@ def test_cs_rated_items_series(): "rated_items should de-index series" - items = ['a', 'b', 'wombat'] + items = ["a", "b", "wombat"] series = pd.Series(np.random.randn(3), index=items) i2 = CandidateSelector.rated_items(series) @@ -16,7 +16,7 @@ def test_cs_rated_items_series(): def test_cs_rated_items(): "rated_items should return list as array" - items = ['a', 'b', 'wombat'] + items = ["a", "b", "wombat"] i2 = CandidateSelector.rated_items(items) assert isinstance(i2, np.ndarray) @@ -25,7 +25,7 @@ def test_cs_rated_items(): def test_cs_rated_items_array(): "rated_items should return array as itself" - items = ['a', 'b', 'wombat'] + items = ["a", "b", "wombat"] items = np.array(items) i2 = CandidateSelector.rated_items(items) diff --git a/tests/test_util.py b/tests/test_util.py index 63fbc783a..6f134b0a2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -31,14 +31,14 @@ def test_stopwatch_str(): w = lku.Stopwatch() time.sleep(0.5) s = str(w) - assert s.endswith('ms') + assert s.endswith("ms") def test_stopwatch_long_str(): w = lku.Stopwatch() time.sleep(1.2) s = str(w) - assert s.endswith('s') + assert s.endswith("s") def test_stopwatch_minutes(): @@ -46,7 +46,7 @@ def test_stopwatch_minutes(): w.stop() w.start_time = w.stop_time - 62 s = str(w) - p = re.compile(r'1m2.\d\ds') + p = re.compile(r"1m2.\d\ds") assert p.match(s) @@ -55,7 +55,7 @@ def test_stopwatch_hours(): w.stop() w.start_time = w.stop_time - 3663 s = str(w) - p = re.compile(r'1h1m3.\d\ds') + p = re.compile(r"1h1m3.\d\ds") assert p.match(s) @@ -64,6 +64,7 @@ def test_last_memo(): def func(foo): history.append(foo) + cache = lku.LastMemo(func) cache("foo") diff --git a/tests/test_util_algos.py b/tests/test_util_algos.py index 644b0240a..8437ebd35 100644 --- a/tests/test_util_algos.py +++ b/tests/test_util_algos.py @@ -5,9 +5,9 @@ import lenskit.util.test as lktu -simple_df = pd.DataFrame({'item': [1, 1, 2, 3], - 'user': [10, 12, 10, 13], - 'rating': [4.0, 3.0, 5.0, 2.0]}) +simple_df = pd.DataFrame( + {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]} +) def test_memorized(): @@ -26,9 +26,9 @@ def test_memorized(): def test_memorized_batch(): algo = basic.Memorized(simple_df) - preds = algo.predict(pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]})) + preds = algo.predict(pd.DataFrame({"user": [10, 10, 12], "item": [1, 2, 1]})) assert isinstance(preds, pd.Series) - assert preds.name == 'prediction' + assert preds.name == "prediction" assert set(preds.index) == set([0, 1, 2]) assert all(preds == [4.0, 5.0, 3.0]) @@ -36,7 +36,7 @@ def test_memorized_batch(): def test_memorized_batch_ord(): algo = basic.Memorized(simple_df) - preds = algo.predict(pd.DataFrame({'user': [10, 12, 10], 'item': [1, 1, 2]})) + preds = algo.predict(pd.DataFrame({"user": [10, 12, 10], "item": [1, 1, 2]})) assert set(preds.index) == set([0, 1, 2]) assert all(preds == [4.0, 3.0, 5.0]) @@ -44,7 +44,7 @@ def test_memorized_batch_ord(): def test_memorized_batch_missing(): algo = basic.Memorized(simple_df) - preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]})) + preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]})) assert set(preds.index) == set([0, 1, 2]) assert all(preds.iloc[:2] == [4.0, 3.0]) assert np.isnan(preds.iloc[2]) @@ -53,8 +53,9 @@ def test_memorized_batch_missing(): def test_memorized_batch_keep_index(): algo = basic.Memorized(simple_df) - query = pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]}, - index=np.random.choice(np.arange(10), 3, False)) + query = pd.DataFrame( + {"user": [10, 10, 12], "item": [1, 2, 1]}, index=np.random.choice(np.arange(10), 3, False) + ) preds = algo.predict(query) assert all(preds.index == query.index) assert all(preds == [4.0, 5.0, 3.0]) @@ -64,7 +65,7 @@ def test_random(): # test case: no seed algo = basic.Random() model = algo.fit(lktu.ml_test.ratings) - items = lktu.ml_test.ratings['item'].unique() + items = lktu.ml_test.ratings["item"].unique() nitems = len(items) assert model is not None @@ -74,17 +75,17 @@ def test_random(): assert len(recs1) == 100 assert len(recs2) == 100 # with very high probabilities - assert set(recs1['item']) != set(recs2['item']) + assert set(recs1["item"]) != set(recs2["item"]) recs_all = algo.recommend(2038) assert len(recs_all) == nitems - assert set(items) == set(recs_all['item']) + assert set(items) == set(recs_all["item"]) def test_random_derive_seed(): - algo = basic.Random(rng_spec='user') + algo = basic.Random(rng_spec="user") model = algo.fit(lktu.ml_test.ratings) - items = lktu.ml_test.ratings['item'].unique() + items = lktu.ml_test.ratings["item"].unique() nitems = len(items) assert model is not None @@ -94,17 +95,17 @@ def test_random_derive_seed(): assert len(recs1) == 100 assert len(recs2) == 100 # with very high probabilities - assert set(recs1['item']) != set(recs2['item']) + assert set(recs1["item"]) != set(recs2["item"]) recs_all = algo.recommend(2038) assert len(recs_all) == nitems - assert set(items) == set(recs_all['item']) + assert set(items) == set(recs_all["item"]) def test_random_rec_from_candidates(): algo = basic.Random() - items = lktu.ml_test.ratings['item'].unique() - users = lktu.ml_test.ratings['user'].unique() + items = lktu.ml_test.ratings["item"].unique() + users = lktu.ml_test.ratings["user"].unique() user1, user2 = np.random.choice(users, size=2, replace=False) algo.fit(lktu.ml_test.ratings) @@ -134,7 +135,7 @@ def test_knownrating_batch_missing(): algo = basic.KnownRating() algo.fit(simple_df) - preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]})) + preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]})) assert set(preds.index) == set([0, 1, 2]) assert all(preds.iloc[:2] == [4.0, 3.0]) assert np.isnan(preds.iloc[2]) diff --git a/tests/test_util_random.py b/tests/test_util_random.py index 62b3abe15..d582a2dcb 100644 --- a/tests/test_util_random.py +++ b/tests/test_util_random.py @@ -62,7 +62,7 @@ def test_initialize(): def test_initialize_key(): - random.init_rng(42, 'wombat') + random.init_rng(42, "wombat") assert root_seed().entropy == 42 # assert root_seed().spawn_key == (zlib.crc32(b'wombat'),) @@ -83,6 +83,6 @@ def test_derive_seed_intkey(): def test_derive_seed_str(): random.init_rng(42, propagate=False) - s2 = random.derive_seed(b'wombat') + s2 = random.derive_seed(b"wombat") assert s2.entropy == 42 # assert s2.spawn_key == (zlib.crc32(b'wombat'),)