diff --git a/tests/test_als_explicit.py b/tests/test_als_explicit.py
index 762e4f296..84411526a 100644
--- a/tests/test_als_explicit.py
+++ b/tests/test_als_explicit.py
@@ -22,11 +22,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
-methods = mark.parametrize('m', ['lu', 'cd'])
+methods = mark.parametrize("m", ["lu", "cd"])
 
 
 @methods
@@ -80,7 +80,7 @@ def test_als_predict_basic_for_new_ratings():
 
     assert algo.bias.mean_ == approx(simple_df.rating.mean())
 
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
     preds = algo.predict_for_user(15, [3], new_ratings)
 
@@ -100,7 +100,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings():
     preds = algo.predict_for_user(u, [i])
 
     new_u_id = -1
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
     new_preds = algo.predict_for_user(new_u_id, [i], new_ratings)
 
@@ -127,9 +127,13 @@ def test_als_predict_for_new_users_with_new_ratings():
 
         user_data = ratings[ratings.user == u]
 
-        _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]))
+        _log.debug(
+            "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])
+        )
 
-        new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values
+        new_ratings = pd.Series(
+            user_data.rating.to_numpy(), index=user_data.item
+        )  # items as index and ratings as values
         new_preds = algo.predict_for_user(new_u_id, items, new_ratings)
 
         _log.debug("preds: " + str(preds.values))
@@ -186,9 +190,13 @@ def test_als_predict_no_user_features_basic():
 
     user_data = ratings[ratings.user == u]
 
-    _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]))
+    _log.debug(
+        "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])
+    )
 
-    new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values
+    new_ratings = pd.Series(
+        user_data.rating.to_numpy(), index=user_data.item
+    )  # items as index and ratings as values
     new_preds = algo_no_user_features.predict_for_user(new_u_id, items, new_ratings)
 
     _log.debug("preds: " + str(preds.values))
@@ -209,8 +217,8 @@ def test_als_train_large():
     assert algo.n_items == ratings.item.nunique()
     assert algo.n_users == ratings.user.nunique()
 
-    icounts = ratings.groupby('item').rating.count()
-    isums = ratings.groupby('item').rating.sum()
+    icounts = ratings.groupby("item").rating.count()
+    isums = ratings.groupby("item").rating.sum()
     is2 = isums - icounts * ratings.rating.mean()
     imeans = is2 / (icounts + 5)
     ibias = pd.Series(algo.bias.item_offsets_, index=algo.item_index_)
@@ -220,14 +228,14 @@ def test_als_train_large():
 
 # don't use wantjit, use this to do a non-JIT test
 def test_als_save_load():
-    original = als.BiasedMF(5, iterations=5, method='lu')
+    original = als.BiasedMF(5, iterations=5, method="lu")
     ratings = lktu.ml_test.ratings
     original.fit(ratings)
 
     assert original.bias.mean_ == approx(ratings.rating.mean())
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
 
     algo = pickle.loads(mod)
     assert algo.bias.mean_ == original.bias.mean_
@@ -239,26 +247,26 @@ def test_als_save_load():
     assert np.all(algo.user_index_ == original.user_index_)
 
     # make sure it still works
-    preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
+    preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8"))
     assert len(preds) == 50
 
 
-@mark.skipif(not binpickle, reason='binpickle not available')
+@mark.skipif(not binpickle, reason="binpickle not available")
 def test_als_binpickle(tmp_path):
     "Test saving ALS with BinPickle"
 
-    original = als.BiasedMF(20, iterations=5, method='lu')
+    original = als.BiasedMF(20, iterations=5, method="lu")
     ratings = lktu.ml_test.ratings
     original.fit(ratings)
 
     assert original.bias.mean_ == approx(ratings.rating.mean())
 
-    file = tmp_path / 'als.bpk'
+    file = tmp_path / "als.bpk"
     binpickle.dump(original, file)
 
     with binpickle.BinPickleFile(file) as bpf:
         # the pickle data should be small
-        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
+        _log.info("serialized to %d pickle bytes", bpf.entries[-1].dec_length)
         pickle_dis(bpf._read_buffer(bpf.entries[-1]))
         assert bpf.entries[-1].dec_length < 2048
 
@@ -273,27 +281,27 @@ def test_als_binpickle(tmp_path):
         assert np.all(algo.user_index_ == original.user_index_)
 
         # make sure it still works
-        preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
+        preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8"))
         assert len(preds) == 50
 
 
 @lktu.wantjit
 @mark.slow
 def test_als_method_match():
-    lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method='lu', rng_spec=42)
-    cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method='cd', rng_spec=42)
+    lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method="lu", rng_spec=42)
+    cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method="cd", rng_spec=42)
 
     ratings = lktu.ml_test.ratings
 
     timer = Stopwatch()
     lu.fit(ratings)
     timer.stop()
-    _log.info('fit with LU solver in %s', timer)
+    _log.info("fit with LU solver in %s", timer)
 
     timer = Stopwatch()
     cd.fit(ratings)
     timer.stop()
-    _log.info('fit with CD solver in %s', timer)
+    _log.info("fit with CD solver in %s", timer)
 
     assert lu.bias.mean_ == approx(ratings.rating.mean())
     assert cd.bias.mean_ == approx(ratings.rating.mean())
@@ -307,29 +315,31 @@ def test_als_method_match():
         cd_preds = cd.predict_for_user(u, items)
         diff = lu_preds - cd_preds
         adiff = np.abs(diff)
-        _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u,
-                  np.linalg.norm(diff, 2),
-                  np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9))
-
-        preds.append(pd.DataFrame({
-            'user': u,
-            'item': items,
-            'lu': lu_preds,
-            'cd': cd_preds,
-            'adiff': adiff
-        }))
+        _log.info(
+            "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f",
+            u,
+            np.linalg.norm(diff, 2),
+            np.min(adiff),
+            np.median(adiff),
+            np.max(adiff),
+            np.quantile(adiff, 0.9),
+        )
+
+        preds.append(
+            pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cd": cd_preds, "adiff": adiff})
+        )
 
     preds = pd.concat(preds, ignore_index=True)
-    _log.info('LU preds:\n%s', preds.lu.describe())
-    _log.info('CD preds:\n%s', preds.cd.describe())
-    _log.info('overall differences:\n%s', preds.adiff.describe())
+    _log.info("LU preds:\n%s", preds.lu.describe())
+    _log.info("CD preds:\n%s", preds.cd.describe())
+    _log.info("overall differences:\n%s", preds.adiff.describe())
     # there are differences. our check: the 90% are under a quarter star
     assert np.quantile(adiff, 0.9) <= 0.27
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_als_batch_accuracy():
     from lenskit.algorithms import bias
     import lenskit.crossfold as xf
@@ -337,30 +347,30 @@ def test_als_batch_accuracy():
 
     ratings = lktu.ml100k.ratings
 
-    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu')
-    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd')
+    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method="lu")
+    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method="cd")
     # algo = bias.Fallback(svd_algo, bias.Bias(damping=5))
 
     def eval(train, test):
-        _log.info('training LU')
+        _log.info("training LU")
         lu_algo.fit(train)
-        _log.info('training CD')
+        _log.info("training CD")
         cd_algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test))
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
     preds = pd.concat(eval(train, test) for (train, test) in folds)
-    preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred)
-    _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False))
-    _log.info('diff summary:\n%s', preds.abs_diff.describe())
+    preds["abs_diff"] = np.abs(preds.lu_pred - preds.cd_pred)
+    _log.info("predictions:\n%s", preds.sort_values("abs_diff", ascending=False))
+    _log.info("diff summary:\n%s", preds.abs_diff.describe())
 
     lu_mae = pm.mae(preds.lu_pred, preds.rating)
     assert lu_mae == approx(0.73, abs=0.045)
     cd_mae = pm.mae(preds.cd_pred, preds.rating)
     assert cd_mae == approx(0.73, abs=0.045)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.lu_pred, df.rating))
     assert user_rmse.mean() == approx(0.94, abs=0.05)
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.cd_pred, df.rating))
     assert user_rmse.mean() == approx(0.94, abs=0.05)
diff --git a/tests/test_als_implicit.py b/tests/test_als_implicit.py
index d72ab6757..94006cb69 100644
--- a/tests/test_als_implicit.py
+++ b/tests/test_als_implicit.py
@@ -20,12 +20,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13]})
+simple_df = pd.DataFrame({"item": [1, 1, 2, 3], "user": [10, 12, 10, 13]})
 
 simple_dfr = simple_df.assign(rating=[4.0, 3.0, 5.0, 2.0])
 
-methods = mark.parametrize('m', ['lu', 'cg'])
+methods = mark.parametrize("m", ["lu", "cg"])
 
 
 @methods
@@ -52,7 +51,7 @@ def test_als_predict_basic():
 
 
 def test_als_predict_basic_for_new_ratings():
-    """ Test ImplicitMF ability to support new ratings """
+    """Test ImplicitMF ability to support new ratings"""
     algo = als.ImplicitMF(20, iterations=10)
     algo.fit(simple_df)
 
@@ -115,7 +114,7 @@ def test_als_predict_for_new_users_with_new_ratings():
         _log.debug("user_features from fit: " + str(algo.user_features_[upos, :]))
 
         # get the user's rating series
-        new_ratings = user_data.set_index('item')['rating'].copy()
+        new_ratings = user_data.set_index("item")["rating"].copy()
         new_preds = algo.predict_for_user(new_u_id, items, new_ratings)
 
         _log.debug("preds: " + str(preds.values))
@@ -151,26 +150,28 @@ def test_als_recs_topn_for_new_users_with_new_ratings(rng):
         recs = rec_algo.recommend(u, 10)
         user_data = ratings[ratings.user == u]
         upos = algo.user_index_.get_loc(u)
-        _log.info('user %s: %s ratings', u, len(user_data))
+        _log.info("user %s: %s ratings", u, len(user_data))
 
         _log.debug("user_features from fit: " + str(algo.user_features_[upos, :]))
 
         # get the user's rating series
-        new_ratings = user_data.set_index('item')['rating'].copy()
+        new_ratings = user_data.set_index("item")["rating"].copy()
         new_recs = rec_algo.recommend(new_u_id, 10, ratings=new_ratings)
 
         # merge new & old recs
-        all_recs = pd.merge(recs.rename(columns={'score': 'old_score'}),
-                            new_recs.rename(columns={'score': 'new_score'}),
-                            how='outer').fillna(-np.inf)
+        all_recs = pd.merge(
+            recs.rename(columns={"score": "old_score"}),
+            new_recs.rename(columns={"score": "new_score"}),
+            how="outer",
+        ).fillna(-np.inf)
 
         tau = stats.kendalltau(all_recs.old_score, all_recs.new_score)
-        _log.info('correlation for user %s: %f', u, tau.correlation)
+        _log.info("correlation for user %s: %f", u, tau.correlation)
         correlations.loc[u] = tau.correlation
 
-    _log.debug('correlations: %s', correlations)
+    _log.debug("correlations: %s", correlations)
 
-    assert not(any(correlations.isnull()))
+    assert not (any(correlations.isnull()))
     assert all(correlations >= 0.5)
 
 
@@ -206,7 +207,7 @@ def test_als_predict_no_user_features_basic():
     preds = algo.predict_for_user(u, items)
 
     user_data = ratings[ratings.user == u]
-    new_ratings = user_data.set_index('item')['rating'].copy()
+    new_ratings = user_data.set_index("item")["rating"].copy()
 
     algo_no_user_features = als.ImplicitMF(5, iterations=10, method="lu", save_user_features=False)
     algo_no_user_features.fit(ratings)
@@ -236,7 +237,7 @@ def test_als_save_load(tmp_path):
     ratings = lktu.ml_test.ratings
     algo.fit(ratings)
 
-    fn = tmp_path / 'model.bpk'
+    fn = tmp_path / "model.bpk"
     binpickle.dump(algo, fn, codec=None)
 
     restored = binpickle.load(fn)
@@ -250,7 +251,7 @@ def test_als_save_load(tmp_path):
 def test_als_train_large_noratings():
     algo = als.ImplicitMF(20, iterations=20)
     ratings = lktu.ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     algo.fit(ratings)
 
     assert len(algo.user_index_) == ratings.user.nunique()
@@ -274,20 +275,20 @@ def test_als_train_large_ratings():
 @lktu.wantjit
 @mark.slow
 def test_als_method_match():
-    lu = als.ImplicitMF(20, iterations=15, method='lu', rng_spec=42)
-    cg = als.ImplicitMF(20, iterations=15, method='cg', rng_spec=42)
+    lu = als.ImplicitMF(20, iterations=15, method="lu", rng_spec=42)
+    cg = als.ImplicitMF(20, iterations=15, method="cg", rng_spec=42)
 
     ratings = lktu.ml_test.ratings
 
     timer = Stopwatch()
     lu.fit(ratings)
     timer.stop()
-    _log.info('fit with LU solver in %s', timer)
+    _log.info("fit with LU solver in %s", timer)
 
     timer = Stopwatch()
     cg.fit(ratings)
     timer.stop()
-    _log.info('fit with CG solver in %s', timer)
+    _log.info("fit with CG solver in %s", timer)
 
     preds = []
 
@@ -298,30 +299,32 @@ def test_als_method_match():
         cd_preds = cg.predict_for_user(u, items)
         diff = lu_preds - cd_preds
         adiff = np.abs(diff)
-        _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u,
-                  np.linalg.norm(diff, 2),
-                  np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9))
-
-        preds.append(pd.DataFrame({
-            'user': u,
-            'item': items,
-            'lu': lu_preds,
-            'cg': cd_preds,
-            'adiff': adiff
-        }))
-        _log.info('user %s tau: %s', u, stats.kendalltau(lu_preds, cd_preds))
+        _log.info(
+            "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f",
+            u,
+            np.linalg.norm(diff, 2),
+            np.min(adiff),
+            np.median(adiff),
+            np.max(adiff),
+            np.quantile(adiff, 0.9),
+        )
+
+        preds.append(
+            pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cg": cd_preds, "adiff": adiff})
+        )
+        _log.info("user %s tau: %s", u, stats.kendalltau(lu_preds, cd_preds))
 
     preds = pd.concat(preds, ignore_index=True)
-    _log.info('LU preds:\n%s', preds.lu.describe())
-    _log.info('CD preds:\n%s', preds.cg.describe())
-    _log.info('overall differences:\n%s', preds.adiff.describe())
+    _log.info("LU preds:\n%s", preds.lu.describe())
+    _log.info("CD preds:\n%s", preds.cg.describe())
+    _log.info("overall differences:\n%s", preds.adiff.describe())
     # there are differences. our check: the 90% are reasonable
     assert np.quantile(adiff, 0.9) < 0.5
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_als_implicit_batch_accuracy():
     import lenskit.crossfold as xf
     from lenskit import batch
@@ -330,31 +333,31 @@ def test_als_implicit_batch_accuracy():
     ratings = lktu.ml100k.ratings
 
     def eval(train, test):
-        train = train.astype({'rating': np.float_})
-        _log.info('training CG')
-        cg_algo = als.ImplicitMF(25, iterations=20, method='cg')
+        train = train.astype({"rating": np.float_})
+        _log.info("training CG")
+        cg_algo = als.ImplicitMF(25, iterations=20, method="cg")
         cg_algo = Recommender.adapt(cg_algo)
         cg_algo.fit(train)
-        _log.info('training LU')
-        lu_algo = als.ImplicitMF(25, iterations=20, method='lu')
+        _log.info("training LU")
+        lu_algo = als.ImplicitMF(25, iterations=20, method="lu")
         lu_algo = Recommender.adapt(lu_algo)
         lu_algo.fit(train)
         users = test.user.unique()
-        _log.info('testing %d users', len(users))
+        _log.info("testing %d users", len(users))
         cg_recs = batch.recommend(cg_algo, users, 100, n_jobs=2)
         lu_recs = batch.recommend(lu_algo, users, 100, n_jobs=2)
-        return pd.concat({'CG': cg_recs, 'LU': lu_recs}, names=['Method']).reset_index('Method')
+        return pd.concat({"CG": cg_recs, "LU": lu_recs}, names=["Method"]).reset_index("Method")
 
     folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
     test = pd.concat(te for (tr, te) in folds)
     recs = pd.concat((eval(train, test) for (train, test) in folds), ignore_index=True)
 
-    _log.info('analyzing recommendations')
+    _log.info("analyzing recommendations")
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     results = rla.compute(recs, test)
-    results = results.groupby('Method')['ndcg'].mean()
-    _log.info('LU nDCG for users is %.4f', results.loc['LU'].mean())
-    _log.info('CG nDCG for users is %.4f', results.loc['CG'].mean())
+    results = results.groupby("Method")["ndcg"].mean()
+    _log.info("LU nDCG for users is %.4f", results.loc["LU"].mean())
+    _log.info("CG nDCG for users is %.4f", results.loc["CG"].mean())
     assert all(results > 0.28)
-    assert results.loc['LU'] == approx(results.loc['CG'], rel=0.05)
+    assert results.loc["LU"] == approx(results.loc["CG"], rel=0.05)
diff --git a/tests/test_batch_predict.py b/tests/test_batch_predict.py
index 6daa308bd..e176a6370 100644
--- a/tests/test_batch_predict.py
+++ b/tests/test_batch_predict.py
@@ -12,7 +12,7 @@
 
 _log = logging.getLogger(__name__)
 
-MLB = namedtuple('MLB', ['ratings', 'algo'])
+MLB = namedtuple("MLB", ["ratings", "algo"])
 
 
 @pytest.fixture
@@ -24,12 +24,12 @@ def mlb():
 
 
 def test_predict_single(mlb):
-    tf = pd.DataFrame({'user': [1], 'item': [31]})
+    tf = pd.DataFrame({"user": [1], "item": [31]})
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 1
     assert all(res.user == 1)
-    assert set(res.columns) == set(['user', 'item', 'prediction'])
+    assert set(res.columns) == set(["user", "item", "prediction"])
     assert all(res.item == 31)
 
     expected = mlb.algo.mean_ + mlb.algo.item_offsets_.loc[31] + mlb.algo.user_offsets_.loc[1]
@@ -45,19 +45,19 @@ def test_predict_user(mlb):
     test_unrated = np.random.choice(unrated, 10, replace=False)
     test_items = pd.concat([test_rated, pd.Series(test_unrated)])
 
-    tf = pd.DataFrame({'user': uid, 'item': test_items})
+    tf = pd.DataFrame({"user": uid, "item": test_items})
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 15
-    assert set(res.columns) == set(['user', 'item', 'prediction'])
+    assert set(res.columns) == set(["user", "item", "prediction"])
     assert all(res.user == uid)
     assert set(res.item) == set(test_items)
 
     # did we get the right predictions?
-    preds = res.set_index(['user', 'item'])
-    preds['rating'] = mlb.algo.mean_
-    preds['rating'] += mlb.algo.item_offsets_
-    preds['rating'] += mlb.algo.user_offsets_.loc[uid]
+    preds = res.set_index(["user", "item"])
+    preds["rating"] = mlb.algo.mean_
+    preds["rating"] += mlb.algo.item_offsets_
+    preds["rating"] += mlb.algo.user_offsets_.loc[uid]
     assert preds.prediction.values == pytest.approx(preds.rating.values)
 
 
@@ -66,17 +66,17 @@ def test_predict_two_users(mlb):
     tf = None
     # make sure we get both UIDs
     while tf is None or len(set(tf.user)) < 2:
-        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item')].sample(10)
+        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item")].sample(10)
 
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 10
     assert set(res.user) == set(uids)
 
-    preds = res.set_index(['user', 'item'])
-    preds['rating'] = mlb.algo.mean_
-    preds['rating'] += mlb.algo.item_offsets_
-    preds['rating'] += mlb.algo.user_offsets_
+    preds = res.set_index(["user", "item"])
+    preds["rating"] = mlb.algo.mean_
+    preds["rating"] += mlb.algo.item_offsets_
+    preds["rating"] += mlb.algo.user_offsets_
     assert preds.prediction.values == pytest.approx(preds.rating.values)
 
 
@@ -85,26 +85,26 @@ def test_predict_include_rating(mlb):
     tf = None
     # make sure we get both UIDs
     while tf is None or len(set(tf.user)) < 2:
-        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item', 'rating')].sample(10)
+        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item", "rating")].sample(10)
 
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 10
     assert set(res.user) == set(uids)
 
-    preds = res.set_index(['user', 'item'])
-    preds['expected'] = mlb.algo.mean_
-    preds['expected'] += mlb.algo.item_offsets_
-    preds['expected'] += mlb.algo.user_offsets_
+    preds = res.set_index(["user", "item"])
+    preds["expected"] = mlb.algo.mean_
+    preds["expected"] += mlb.algo.item_offsets_
+    preds["expected"] += mlb.algo.user_offsets_
     assert preds.prediction.values == pytest.approx(preds.expected.values)
 
-    urv = mlb.ratings.set_index(['user', 'item'])
+    urv = mlb.ratings.set_index(["user", "item"])
     assert all(preds.rating.values == urv.loc[preds.index, :].rating.values)
 
 
-@pytest.mark.skipif(not lktu.ml100k.available, reason='ML-100K required')
+@pytest.mark.skipif(not lktu.ml100k.available, reason="ML-100K required")
 @pytest.mark.eval
-@pytest.mark.parametrize('ncpus', [None, 1, 2])
+@pytest.mark.parametrize("ncpus", [None, 1, 2])
 def test_bias_batch_predict(ncpus):
     from lenskit.algorithms import bias
     import lenskit.crossfold as xf
@@ -116,19 +116,19 @@ def test_bias_batch_predict(ncpus):
     algo = bias.Bias(damping=5)
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.predict(algo, test, n_jobs=ncpus)
         return recs
 
-    preds = pd.concat((eval(train, test)
-                       for (train, test)
-                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
+    preds = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    )
 
-    _log.info('analyzing predictions')
+    _log.info("analyzing predictions")
     rmse = pm.rmse(preds.prediction, preds.rating)
-    _log.info('RMSE is %f', rmse)
+    _log.info("RMSE is %f", rmse)
     assert rmse == pytest.approx(0.95, abs=0.1)
 
 
@@ -144,4 +144,4 @@ def test_batch_predict_preshared():
     ares = lkb.train_isolated(algo, train)
     preds = lkb.predict(ares, test)
     assert len(preds) == len(test)
-    assert not any(preds['prediction'].isna())
+    assert not any(preds["prediction"].isna())
diff --git a/tests/test_batch_recommend.py b/tests/test_batch_recommend.py
index 92ad52e6a..2d8a3d08b 100644
--- a/tests/test_batch_recommend.py
+++ b/tests/test_batch_recommend.py
@@ -12,7 +12,7 @@
 from lenskit import batch, topn
 import lenskit.crossfold as xf
 
-MLB = namedtuple('MLB', ['ratings', 'algo'])
+MLB = namedtuple("MLB", ["ratings", "algo"])
 _log = logging.getLogger(__name__)
 
 
@@ -32,33 +32,32 @@ def __init__(self, ratings):
         self.isolate = False
 
     def evaluate(self, algo, train, test, **kwargs):
-        _log.info('running training')
+        _log.info("running training")
         if self.isolate:
             algo = batch.train_isolated(algo, train)
         else:
             algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(algo, test.user.unique(), 100, **kwargs)
         return recs
 
     def eval_all(self, algo, **kwargs):
-        return pd.concat(self.evaluate(algo, train, test, **kwargs)
-                         for (train, test) in self.folds)
+        return pd.concat(self.evaluate(algo, train, test, **kwargs) for (train, test) in self.folds)
 
     def check_positive_ndcg(self, recs):
-        _log.info('analyzing recommendations')
+        _log.info("analyzing recommendations")
         rla = topn.RecListAnalysis()
         rla.add_metric(topn.ndcg)
         results = rla.compute(recs, self.test)
         dcg = results.ndcg
-        _log.info('nDCG for %d users is %f (max=%f)', len(dcg), dcg.mean(), dcg.max())
+        _log.info("nDCG for %d users is %f (max=%f)", len(dcg), dcg.mean(), dcg.max())
         assert dcg.mean() > 0
 
 
 @pytest.fixture
 def ml_folds() -> MLFolds:
     if not lktu.ml100k.available:
-        raise pytest.skip('ML-100K not available')
+        raise pytest.skip("ML-100K not available")
     ratings = lktu.ml100k.ratings
     return MLFolds(ratings)
 
@@ -67,9 +66,9 @@ def test_recommend_single(mlb):
     res = batch.recommend(mlb.algo, [1], None, {1: [31]})
 
     assert len(res) == 1
-    assert all(res['user'] == 1)
-    assert all(res['rank'] == 1)
-    assert set(res.columns) == set(['user', 'rank', 'item', 'score'])
+    assert all(res["user"] == 1)
+    assert all(res["rank"] == 1)
+    assert set(res.columns) == set(["user", "rank", "item", "score"])
 
     algo = mlb.algo.predictor
     expected = algo.mean_ + algo.item_offsets_.loc[31] + algo.user_offsets_.loc[1]
@@ -87,9 +86,9 @@ def candidates(user):
     res = batch.recommend(mlb.algo, [5], 10, candidates)
 
     assert len(res) == 10
-    assert set(res.columns) == set(['user', 'rank', 'item', 'score'])
-    assert all(res['user'] == uid)
-    assert all(res['rank'] == np.arange(10) + 1)
+    assert set(res.columns) == set(["user", "rank", "item", "score"])
+    assert all(res["user"] == uid)
+    assert all(res["rank"] == np.arange(10) + 1)
     # they should be in decreasing order
     assert all(np.diff(res.score) <= 0)
 
@@ -105,12 +104,12 @@ def candidates(user):
 
     assert len(res) == 20
     assert set(res.user) == set([5, 10])
-    assert all(res.groupby('user').item.count() == 10)
-    assert all(res.groupby('user')['rank'].max() == 10)
+    assert all(res.groupby("user").item.count() == 10)
+    assert all(res.groupby("user")["rank"].max() == 10)
     assert all(np.diff(res[res.user == 5].score) <= 0)
-    assert all(np.diff(res[res.user == 5]['rank']) == 1)
+    assert all(np.diff(res[res.user == 5]["rank"]) == 1)
     assert all(np.diff(res[res.user == 10].score) <= 0)
-    assert all(np.diff(res[res.user == 10]['rank']) == 1)
+    assert all(np.diff(res[res.user == 10]["rank"]) == 1)
 
 
 def test_recommend_no_cands(mlb):
@@ -118,19 +117,19 @@ def test_recommend_no_cands(mlb):
 
     assert len(res) == 20
     assert set(res.user) == set([5, 10])
-    assert all(res.groupby('user').item.count() == 10)
-    assert all(res.groupby('user')['rank'].max() == 10)
+    assert all(res.groupby("user").item.count() == 10)
+    assert all(res.groupby("user")["rank"].max() == 10)
     assert all(np.diff(res[res.user == 5].score) <= 0)
-    assert all(np.diff(res[res.user == 5]['rank']) == 1)
+    assert all(np.diff(res[res.user == 5]["rank"]) == 1)
     assert all(np.diff(res[res.user == 10].score) <= 0)
-    assert all(np.diff(res[res.user == 10]['rank']) == 1)
+    assert all(np.diff(res[res.user == 10]["rank"]) == 1)
 
-    idx_rates = mlb.ratings.set_index(['user', 'item'])
-    merged = res.join(idx_rates, on=['user', 'item'], how='inner')
+    idx_rates = mlb.ratings.set_index(["user", "item"])
+    merged = res.join(idx_rates, on=["user", "item"], how="inner")
     assert len(merged) == 0
 
 
-@pytest.mark.parametrize(('ncpus', 'isolate'), [(None, False), (1, False), (2, True)])
+@pytest.mark.parametrize(("ncpus", "isolate"), [(None, False), (1, False), (2, True)])
 @pytest.mark.eval
 def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate):
     algo = Bias(damping=5)
@@ -142,7 +141,7 @@ def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate):
     ml_folds.check_positive_ndcg(recs)
 
 
-@pytest.mark.parametrize('ncpus', [None, 1, 2])
+@pytest.mark.parametrize("ncpus", [None, 1, 2])
 @pytest.mark.eval
 def test_pop_batch_recommend(ml_folds: MLFolds, ncpus):
     algo = Popular()
diff --git a/tests/test_batch_train.py b/tests/test_batch_train.py
index 1f79cc841..fbd1a4ca5 100644
--- a/tests/test_batch_train.py
+++ b/tests/test_batch_train.py
@@ -21,7 +21,7 @@ def test_train_isolate():
 
 
 def test_train_isolate_file(tmp_path):
-    fn = tmp_path / 'saved.bpk'
+    fn = tmp_path / "saved.bpk"
     algo = Bias()
     algo = Recommender.adapt(algo)
 
diff --git a/tests/test_bias.py b/tests/test_bias.py
index db459427d..275c71c26 100644
--- a/tests/test_bias.py
+++ b/tests/test_bias.py
@@ -14,9 +14,9 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_bias_check_arguments():
@@ -39,12 +39,12 @@ def test_bias_full():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
     assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([0.25, -0.5, 0]))
 
@@ -54,13 +54,13 @@ def test_bias_clone():
     algo.fit(simple_df)
 
     params = algo.get_params()
-    assert sorted(params.keys()) == ['damping', 'items', 'users']
+    assert sorted(params.keys()) == ["damping", "items", "users"]
 
     a2 = lku.clone(algo)
     assert a2 is not algo
-    assert getattr(a2, 'mean_', None) is None
-    assert getattr(a2, 'item_offsets_', None) is None
-    assert getattr(a2, 'user_offsets_', None) is None
+    assert getattr(a2, "mean_", None) is None
+    assert getattr(a2, "item_offsets_", None) is None
+    assert getattr(a2, "user_offsets_", None) is None
 
 
 def test_bias_global_only():
@@ -77,7 +77,7 @@ def test_bias_no_user():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))
 
@@ -91,7 +91,7 @@ def test_bias_no_item():
     assert algo.item_offsets_ is None
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
     assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([1.0, -0.5, -1.5]))
 
@@ -99,8 +99,8 @@ def test_bias_no_item():
 def test_bias_index_props():
     algo = Bias()
     algo.fit(simple_df)
-    assert all(np.sort(algo.user_index) == np.unique(simple_df['user']))
-    assert all(np.sort(algo.item_index) == np.unique(simple_df['item']))
+    assert all(np.sort(algo.user_index) == np.unique(simple_df["user"]))
+    assert all(np.sort(algo.item_index) == np.unique(simple_df["item"]))
 
 
 def test_bias_global_predict():
@@ -140,13 +140,13 @@ def test_bias_new_user_predict():
     algo = Bias()
     algo.fit(simple_df)
 
-    ratings = pd.DataFrame({'item': [1, 2, 3], 'rating': [1.5, 2.5, 3.5]})
-    ratings = ratings.set_index('item').rating
+    ratings = pd.DataFrame({"item": [1, 2, 3], "rating": [1.5, 2.5, 3.5]})
+    ratings = ratings.set_index("item").rating
     p = algo.predict_for_user(None, [1, 3], ratings=ratings)
 
     offs = ratings - algo.mean_ - algo.item_offsets_
     umean = offs.mean()
-    _log.info('user mean is %f', umean)
+    _log.info("user mean is %f", umean)
 
     assert len(p) == 2
     assert p.values == approx((algo.mean_ + algo.item_offsets_ + umean).loc[[1, 3]].values)
@@ -180,12 +180,12 @@ def test_bias_train_ml_ratings():
     algo.fit(ratings)
 
     assert algo.mean_ == approx(ratings.rating.mean())
-    imeans_data = ratings.groupby('item').rating.mean()
+    imeans_data = ratings.groupby("item").rating.mean()
     imeans_algo = algo.item_offsets_ + algo.mean_
     ares, data = imeans_algo.align(imeans_data)
     assert ares.values == approx(data.values)
 
-    urates = ratings.set_index('user').loc[2].set_index('item').rating
+    urates = ratings.set_index("user").loc[2].set_index("item").rating
     umean = (urates - imeans_data[urates.index]).mean()
     p = algo.predict_for_user(2, [10, 11, -1])
     assert len(p) == 3
@@ -200,15 +200,15 @@ def test_bias_transform():
 
     normed = algo.fit_transform(ratings)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
-    n2 = ratings.join(algo.item_offsets_, on='item')
-    n2 = n2.join(algo.user_offsets_, on='user')
+    n2 = ratings.join(algo.item_offsets_, on="item")
+    n2 = n2.join(algo.user_offsets_, on="user")
     nr = n2.rating - algo.mean_ - n2.i_off - n2.u_off
-    assert normed['rating'].values == approx(nr.values)
+    assert normed["rating"].values == approx(nr.values)
 
 
 def test_bias_transform_indexes():
@@ -217,35 +217,35 @@ def test_bias_transform_indexes():
 
     normed = algo.fit_transform(ratings, indexes=True)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
-    assert all(normed['uidx'] == algo.user_offsets_.index.get_indexer(ratings['user']))
-    assert all(normed['iidx'] == algo.item_offsets_.index.get_indexer(ratings['item']))
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
+    assert all(normed["uidx"] == algo.user_offsets_.index.get_indexer(ratings["user"]))
+    assert all(normed["iidx"] == algo.item_offsets_.index.get_indexer(ratings["item"]))
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'].values, 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"].values, 1.0e-6)
 
 
-@mark.parametrize(['users', 'items'], [(True, False), (False, True), (False, False)])
+@mark.parametrize(["users", "items"], [(True, False), (False, True), (False, False)])
 def test_bias_transform_disable(users, items):
     algo = Bias(users=users, items=items)
     ratings = ml_test.ratings
 
     normed = algo.fit_transform(ratings)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
     n2 = ratings
     nr = n2.rating - algo.mean_
     if items:
-        n2 = n2.join(algo.item_offsets_, on='item')
+        n2 = n2.join(algo.item_offsets_, on="item")
         nr = nr - n2.i_off
     if users:
-        n2 = n2.join(algo.user_offsets_, on='user')
+        n2 = n2.join(algo.user_offsets_, on="user")
         nr = nr - n2.u_off
-    assert normed['rating'].values == approx(nr.values)
+    assert normed["rating"].values == approx(nr.values)
 
 
 def test_bias_item_damp():
@@ -254,7 +254,7 @@ def test_bias_item_damp():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
@@ -268,10 +268,11 @@ def test_bias_user_damp():
     assert algo.item_offsets_ is None
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4
+    )
 
 
 def test_bias_damped():
@@ -280,15 +281,16 @@ def test_bias_damped():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
 
 
 def test_bias_separate_damping():
@@ -297,64 +299,76 @@ def test_bias_separate_damping():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
-    assert algo.item_offsets_.loc[1:3].values == \
-        approx(np.array([0, 0.136364, -0.13636]), abs=1.0e-4)
+    assert algo.item_offsets_.loc[1:3].values == approx(
+        np.array([0, 0.136364, -0.13636]), abs=1.0e-4
+    )
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4
+    )
+
 
 def test_transform_user_with_user_bias():
     algo = Bias()
     algo.fit(simple_df)
 
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
-    ratings_with_bias, user_bias = algo.transform_user(new_ratings) # user: 13
+    ratings_with_bias, user_bias = algo.transform_user(new_ratings)  # user: 13
     result = algo.inverse_transform_user(13, ratings_with_bias, user_bias)
 
     assert new_ratings[1] == result[1]
     assert new_ratings[2] == result[2]
 
+
 def test_transform_user_without_user_bias():
     user = 12
     algo = Bias()
     algo.fit(simple_df)
 
-    new_ratings = pd.Series([-0.5, 1.5], index=[2, 3]) # items as index and ratings as values
+    new_ratings = pd.Series([-0.5, 1.5], index=[2, 3])  # items as index and ratings as values
 
     v = algo.inverse_transform_user(user, new_ratings)
 
-    assert v[2] == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_
-    assert v[3] == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_
+    assert (
+        v[2]
+        == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_
+    )
+    assert (
+        v[3]
+        == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_
+    )
+
 
 def test_bias_save():
     original = Bias(damping=5)
     original.fit(simple_df)
     assert original.mean_ == approx(3.5)
 
-    _log.info('saving baseline model')
+    _log.info("saving baseline model")
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
 
     algo = pickle.loads(mod)
 
     assert algo.mean_ == original.mean_
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
 
 
 def test_bias_binpickle(tmp_path):
@@ -362,20 +376,21 @@ def test_bias_binpickle(tmp_path):
     original.fit(simple_df)
     assert original.mean_ == approx(3.5)
 
-    _log.info('saving baseline model')
-    fn = tmp_path / 'bias.bpk'
+    _log.info("saving baseline model")
+    fn = tmp_path / "bias.bpk"
     binpickle.dump(original, fn)
     algo = binpickle.load(fn)
 
     assert algo.mean_ == original.mean_
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
diff --git a/tests/test_candidate_selector.py b/tests/test_candidate_selector.py
index e5eae1615..0d614a0b2 100644
--- a/tests/test_candidate_selector.py
+++ b/tests/test_candidate_selector.py
@@ -4,9 +4,9 @@
 import pandas as pd
 import numpy as np
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_empty():
@@ -45,7 +45,7 @@ def test_unrated_big():
     ratings = lktu.ml_test.ratings
     users = ratings.user.unique()
     items = ratings.item.unique()
-    user_items = ratings.set_index('user').item
+    user_items = ratings.set_index("user").item
 
     sel = basic.UnratedItemCandidateSelector()
     s2 = sel.fit(ratings)
diff --git a/tests/test_crossfold.py b/tests/test_crossfold.py
index 04efadb53..aa660f2c3 100644
--- a/tests/test_crossfold.py
+++ b/tests/test_crossfold.py
@@ -19,8 +19,8 @@ def test_partition_rows():
     for s in splits:
         assert len(s.test) + len(s.train) == len(ratings)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     # we should partition!
@@ -28,8 +28,8 @@ def test_partition_rows():
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -46,16 +46,16 @@ def test_sample_rows():
     for s in splits:
         assert len(s.test) == 1000
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     for s1, s2 in it.product(splits, splits):
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -69,16 +69,16 @@ def test_sample_rows_more_smaller_parts():
     for s in splits:
         assert len(s.test) == 500
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     for s1, s2 in it.product(splits, splits):
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -92,13 +92,15 @@ def test_sample_non_disjoint():
     for s in splits:
         assert len(s.test) == 1000
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     # There are enough splits & items we should pick at least one duplicate
-    ipairs = ((s1.test.set_index('user', 'item').index, s2.test.set_index('user', 'item').index)
-              for (s1, s2) in it.product(splits, splits))
+    ipairs = (
+        (s1.test.set_index("user", "item").index, s2.test.set_index("user", "item").index)
+        for (s1, s2) in it.product(splits, splits)
+    )
     isizes = [len(i1.intersection(i2)) for (i1, i2) in ipairs]
     assert any(n > 0 for n in isizes)
 
@@ -113,8 +115,8 @@ def test_sample_oversize():
     for s in splits:
         assert len(s.test) + len(s.train) == len(ratings)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
 
@@ -190,7 +192,7 @@ def test_last_frac():
     ratings = lktu.ml_test.ratings
     users = np.random.choice(ratings.user.unique(), 5, replace=False)
 
-    samp = xf.LastFrac(0.2, 'timestamp')
+    samp = xf.LastFrac(0.2, "timestamp")
     for u in users:
         udf = ratings[ratings.user == u]
         tst = samp(udf)
@@ -200,7 +202,7 @@ def test_last_frac():
         assert len(tst) <= math.ceil(len(udf) * 0.2)
         assert tst.timestamp.min() >= trn.timestamp.max()
 
-    samp = xf.LastFrac(0.5, 'timestamp')
+    samp = xf.LastFrac(0.5, "timestamp")
     for u in users:
         udf = ratings[ratings.user == u]
         tst = samp(udf)
@@ -218,14 +220,13 @@ def test_partition_users():
     assert len(splits) == 5
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert all(ucounts == 5)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert all(s.train['user'].isin(s.train['user'].unique()))
+        assert all(s.train["user"].isin(s.train["user"].unique()))
         assert len(s.test) + len(s.train) == len(ratings)
 
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
 
@@ -235,9 +236,9 @@ def test_partition_may_skip_train():
     ratings = lktu.ml_test.ratings
     # make a data set where some users only have 1 rating
     ratings = ratings.sample(frac=0.1)
-    users = ratings.groupby('user')['rating'].count()
+    users = ratings.groupby("user")["rating"].count()
     assert users.min() == 1.0  # we should have some small users!
-    users.name = 'ur_count'
+    users.name = "ur_count"
 
     splits = xf.partition_users(ratings, 5, xf.SampleN(1))
     splits = list(splits)
@@ -246,12 +247,12 @@ def test_partition_may_skip_train():
     # now we go make sure we're missing some users! And don't have any NaN ratings
     for train, test in splits:
         # no null ratings
-        assert all(train['rating'].notna())
+        assert all(train["rating"].notna())
         # see if test users with 1 rating are missing from train
-        test = test.join(users, on='user')
-        assert all(~(test.loc[test['ur_count'] == 1, 'user'].isin(train['user'].unique())))
+        test = test.join(users, on="user")
+        assert all(~(test.loc[test["ur_count"] == 1, "user"].isin(train["user"].unique())))
         # and users with more than one rating are in train
-        assert all(test.loc[test['ur_count'] > 1, 'user'].isin(train['user'].unique()))
+        assert all(test.loc[test["ur_count"] > 1, "user"].isin(train["user"].unique()))
 
 
 def test_partition_users_frac():
@@ -259,19 +260,18 @@ def test_partition_users_frac():
     splits = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
-        tucs = s.test.groupby('user').item.count()
+        tucs = s.test.groupby("user").item.count()
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
         assert all(s.test.index.union(s.train.index) == ratings.index)
         assert len(s.test) + len(s.train) == len(ratings)
 
     # we have all users
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
 
@@ -283,7 +283,7 @@ def test_sample_users():
     assert len(splits) == 5
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(s.test) == 5 * 100
         assert len(ucounts) == 100
         assert all(ucounts == 5)
@@ -304,11 +304,11 @@ def test_sample_users_frac():
     splits = xf.sample_users(ratings, 5, 100, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
-        tucs = s.test.groupby('user').item.count()
+        tucs = s.test.groupby("user").item.count()
         assert len(tucs) == 100
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
@@ -332,14 +332,13 @@ def test_sample_users_frac_oversize():
     assert len(splits) == 20
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(ucounts) < 100
         assert all(ucounts == 5)
         assert all(s.test.index.union(s.train.index) == ratings.index)
         assert len(s.test) + len(s.train) == len(ratings)
 
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
     for s1, s2 in it.product(splits, splits):
@@ -358,7 +357,7 @@ def test_sample_users_frac_oversize_ndj():
     assert len(splits) == 20
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(ucounts) == 100
         assert len(s.test) == 5 * 100
         assert all(ucounts == 5)
@@ -369,7 +368,7 @@ def test_sample_users_frac_oversize_ndj():
 def test_non_unique_index_partition_users():
     """Partitioning users when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
             pass
@@ -378,7 +377,7 @@ def test_non_unique_index_partition_users():
 def test_sample_users():
     """Sampling users when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.sample_users(ratings, 5, 100, xf.SampleN(5)):
             pass
@@ -387,7 +386,7 @@ def test_sample_users():
 def test_sample_rows():
     """Sampling ratings when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.sample_rows(ratings, partitions=5, size=1000):
             pass
@@ -396,7 +395,7 @@ def test_sample_rows():
 def test_partition_users():
     """Partitioning ratings when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
             pass
diff --git a/tests/test_fallback.py b/tests/test_fallback.py
index ba958bf43..b2644b866 100644
--- a/tests/test_fallback.py
+++ b/tests/test_fallback.py
@@ -9,9 +9,9 @@
 import lenskit.util.test as lktu
 from pytest import approx
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_fallback_train_one():
@@ -42,15 +42,15 @@ def test_fallback_list():
     assert len(algo.algorithms) == 2
 
     params = algo.get_params()
-    assert list(params.keys()) == ['algorithms']
-    assert len(params['algorithms']) == 2
-    assert isinstance(params['algorithms'][0], basic.Memorized)
-    assert isinstance(params['algorithms'][1], Bias)
+    assert list(params.keys()) == ["algorithms"]
+    assert len(params["algorithms"]) == 2
+    assert isinstance(params["algorithms"][0], basic.Memorized)
+    assert isinstance(params["algorithms"][1], Bias)
 
 
 def test_fallback_string():
     algo = basic.Fallback([basic.Memorized(simple_df), Bias()])
-    assert 'Fallback' in str(algo)
+    assert "Fallback" in str(algo)
 
 
 def test_fallback_clone():
@@ -110,7 +110,7 @@ def test_fallback_save_load(tmp_path):
     original = basic.Fallback(basic.Memorized(simple_df), Bias())
     original.fit(lktu.ml_test.ratings)
 
-    fn = tmp_path / 'fb.mod'
+    fn = tmp_path / "fb.mod"
 
     binpickle.dump(original, fn)
 
diff --git a/tests/test_funksvd.py b/tests/test_funksvd.py
index 149ed1f2d..ffcc2ac7f 100644
--- a/tests/test_funksvd.py
+++ b/tests/test_funksvd.py
@@ -13,9 +13,9 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_fsvd_basic_build():
@@ -136,7 +136,7 @@ def test_fsvd_save_load():
     assert original.user_features_.shape == (ratings.user.nunique(), 20)
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
     algo = pickle.loads(mod)
 
     assert algo.bias.mean_ == original.bias.mean_
@@ -151,7 +151,7 @@ def test_fsvd_save_load():
 @lktu.wantjit
 @mark.slow
 def test_fsvd_train_binary():
-    ratings = lktu.ml_test.ratings.drop(columns=['rating', 'timestamp'])
+    ratings = lktu.ml_test.ratings.drop(columns=["rating", "timestamp"])
 
     original = svd.FunkSVD(20, iterations=20, bias=False)
     original.fit(ratings)
@@ -165,19 +165,19 @@ def test_fsvd_train_binary():
 @mark.slow
 def test_fsvd_known_preds():
     algo = svd.FunkSVD(15, iterations=125, lrate=0.001)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'funksvd-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "funksvd-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
+    pairs = known_preds.loc[:, ["user", "item"]]
 
     preds = algo.predict(pairs)
-    known_preds.rename(columns={'prediction': 'expected'}, inplace=True)
+    known_preds.rename(columns={"prediction": "expected"}, inplace=True)
     merged = known_preds.assign(prediction=preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     assert not any(merged.prediction.isna() & merged.expected.notna())
     err = merged.error
     err = err[err.notna()]
@@ -185,14 +185,14 @@ def test_fsvd_known_preds():
         assert all(err.abs() < 0.01)
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('erroneous predictions:\n%s', bad)
+        _log.error("erroneous predictions:\n%s", bad)
         raise e
 
 
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_fsvd_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -206,9 +206,9 @@ def test_fsvd_batch_accuracy():
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
@@ -216,5 +216,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_knn_item_item.py b/tests/test_knn_item_item.py
index 0518b1cb2..e4a3f6097 100644
--- a/tests/test_knn_item_item.py
+++ b/tests/test_knn_item_item.py
@@ -28,59 +28,62 @@
 _log = logging.getLogger(__name__)
 
 ml_ratings = lktu.ml_test.ratings
-simple_ratings = pd.DataFrame.from_records([
-    (1, 6, 4.0),
-    (2, 6, 2.0),
-    (1, 7, 3.0),
-    (2, 7, 2.0),
-    (3, 7, 5.0),
-    (4, 7, 2.0),
-    (1, 8, 3.0),
-    (2, 8, 4.0),
-    (3, 8, 3.0),
-    (4, 8, 2.0),
-    (5, 8, 3.0),
-    (6, 8, 2.0),
-    (1, 9, 3.0),
-    (3, 9, 4.0)
-], columns=['user', 'item', 'rating'])
-
-
-@fixture(scope='module')
+simple_ratings = pd.DataFrame.from_records(
+    [
+        (1, 6, 4.0),
+        (2, 6, 2.0),
+        (1, 7, 3.0),
+        (2, 7, 2.0),
+        (3, 7, 5.0),
+        (4, 7, 2.0),
+        (1, 8, 3.0),
+        (2, 8, 4.0),
+        (3, 8, 3.0),
+        (4, 8, 2.0),
+        (5, 8, 3.0),
+        (6, 8, 2.0),
+        (1, 9, 3.0),
+        (3, 9, 4.0),
+    ],
+    columns=["user", "item", "rating"],
+)
+
+
+@fixture(scope="module")
 def ml_subset():
     "Fixture that returns a subset of the MovieLens database."
     ratings = lktu.ml_test.ratings
-    icounts = ratings.groupby('item').rating.count()
+    icounts = ratings.groupby("item").rating.count()
     top = icounts.nlargest(500)
-    ratings = ratings.set_index('item')
+    ratings = ratings.set_index("item")
     top_rates = ratings.loc[top.index, :]
-    _log.info('top 500 items yield %d of %d ratings', len(top_rates), len(ratings))
+    _log.info("top 500 items yield %d of %d ratings", len(top_rates), len(ratings))
     return top_rates.reset_index()
 
 
 def test_ii_dft_config():
     algo = knn.ItemItem(30, save_nbrs=500)
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_ii_exp_config():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='explicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="explicit")
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_ii_imp_config():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit")
     assert not algo.center
-    assert algo.aggregate == 'sum'
+    assert algo.aggregate == "sum"
     assert not algo.use_ratings
 
 
 def test_ii_imp_clone():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit")
     a2 = clone(algo)
 
     assert a2.get_params() == algo.get_params()
@@ -98,17 +101,17 @@ def test_ii_train():
 
     # 6 is a neighbor of 7
     six, seven = algo.item_index_.get_indexer([6, 7])
-    _log.info('six: %d', six)
-    _log.info('seven: %d', seven)
-    _log.info('matrix: %s', algo.sim_matrix_)
+    _log.info("six: %d", six)
+    _log.info("seven: %d", seven)
+    _log.info("matrix: %s", algo.sim_matrix_)
     assert matrix[six, seven] > 0
     # and has the correct score
-    six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating
+    six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating
     six_v = six_v - six_v.mean()
-    seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating
+    seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating
     seven_v = seven_v - seven_v.mean()
     denom = la.norm(six_v.values) * la.norm(seven_v.values)
-    six_v, seven_v = six_v.align(seven_v, join='inner')
+    six_v, seven_v = six_v.align(seven_v, join="inner")
     num = six_v.dot(seven_v)
     assert matrix[six, seven] == approx(num / denom, 0.01)
 
@@ -133,12 +136,12 @@ def test_ii_train_unbounded():
     assert matrix[six, seven] > 0
 
     # and has the correct score
-    six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating
+    six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating
     six_v = six_v - six_v.mean()
-    seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating
+    seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating
     seven_v = seven_v - seven_v.mean()
     denom = la.norm(six_v.values) * la.norm(seven_v.values)
-    six_v, seven_v = six_v.align(seven_v, join='inner')
+    six_v, seven_v = six_v.align(seven_v, join="inner")
     num = six_v.dot(seven_v)
     assert matrix[six, seven] == approx(num / denom, 0.01)
 
@@ -155,8 +158,8 @@ def test_ii_simple_predict():
 
 
 def test_ii_simple_implicit_predict():
-    algo = knn.ItemItem(30, center=False, aggregate='sum')
-    algo.fit(simple_ratings.loc[:, ['user', 'item']])
+    algo = knn.ItemItem(30, center=False, aggregate="sum")
+    algo.fit(simple_ratings.loc[:, ["user", "item"]])
 
     res = algo.predict_for_user(3, [6])
     assert res is not None
@@ -168,9 +171,7 @@ def test_ii_simple_implicit_predict():
 
 @mark.skip("currently broken")
 def test_ii_warn_duplicates():
-    extra = pd.DataFrame.from_records([
-        (3, 7, 4.5)
-    ], columns=['user', 'item', 'rating'])
+    extra = pd.DataFrame.from_records([(3, 7, 4.5)], columns=["user", "item", "rating"])
     ratings = pd.concat([simple_ratings, extra])
     algo = knn.ItemItem(5)
     algo.fit(ratings)
@@ -193,7 +194,7 @@ def test_ii_warns_center():
 def test_ii_warns_center_with_no_use_ratings():
     "Test that item-item warns if you configure to ignore ratings but center."
     with pytest.warns(ConfigWarning):
-        knn.ItemItem(5, use_ratings=False, aggregate='sum')
+        knn.ItemItem(5, use_ratings=False, aggregate="sum")
 
 
 def test_ii_warns_wa_with_no_use_ratings():
@@ -216,7 +217,7 @@ def test_ii_train_big():
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
 
@@ -234,20 +235,20 @@ def test_ii_train_big_unbounded():
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
 
 @lktu.wantjit
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_ii_train_ml100k(tmp_path):
     "Test an unbounded model on ML-100K"
     ratings = lktu.ml100k.ratings
     algo = knn.ItemItem(30)
-    _log.info('training model')
+    _log.info("training model")
     algo.fit(ratings)
 
-    _log.info('testing model')
+    _log.info("testing model")
 
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
@@ -257,17 +258,17 @@ def test_ii_train_ml100k(tmp_path):
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ratings.groupby('item').rating.mean()
+    means = ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
     # save
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(algo, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         restored = pickle.load(modf)
 
     assert all(restored.sim_matrix_.values > 0)
@@ -290,22 +291,22 @@ def test_ii_train_ml100k(tmp_path):
 @mark.slow
 def test_ii_large_models():
     "Several tests of large trained I-I models"
-    _log.info('training limited model')
+    _log.info("training limited model")
     MODEL_SIZE = 100
     algo_lim = knn.ItemItem(30, save_nbrs=MODEL_SIZE)
     algo_lim.fit(ml_ratings)
 
-    _log.info('training unbounded model')
+    _log.info("training unbounded model")
     algo_ub = knn.ItemItem(30)
     algo_ub.fit(ml_ratings)
 
-    _log.info('testing models')
+    _log.info("testing models")
     assert all(np.logical_not(np.isnan(algo_lim.sim_matrix_.values)))
     assert all(algo_lim.sim_matrix_.values > 0)
     # a little tolerance
     assert all(algo_lim.sim_matrix_.values < 1 + 1.0e-6)
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_lim.item_index_].values == approx(algo_lim.item_means_)
 
     assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.values)))
@@ -313,24 +314,26 @@ def test_ii_large_models():
     # a little tolerance
     assert all(algo_ub.sim_matrix_.values < 1 + 1.0e-6)
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_ub.item_index_].values == approx(algo_ub.item_means_)
 
-    mc_rates = ml_ratings.set_index('item')\
-                         .join(pd.DataFrame({'item_mean': means}))\
-                         .assign(rating=lambda df: df.rating - df.item_mean)
+    mc_rates = (
+        ml_ratings.set_index("item")
+        .join(pd.DataFrame({"item_mean": means}))
+        .assign(rating=lambda df: df.rating - df.item_mean)
+    )
 
     mat_lim = algo_lim.sim_matrix_.to_scipy()
     mat_ub = algo_ub.sim_matrix_.to_scipy()
 
-    _log.info('checking a sample of neighborhoods')
+    _log.info("checking a sample of neighborhoods")
     items = pd.Series(algo_ub.item_index_)
     items = items[algo_ub.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo_ub.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
         assert ipos == algo_lim.item_index_.get_loc(i)
-        irates = mc_rates.loc[[i], :].set_index('user').rating
+        irates = mc_rates.loc[[i], :].set_index("user").rating
 
         ub_row = mat_ub.getrow(ipos)
         b_row = mat_lim.getrow(ipos)
@@ -345,14 +348,14 @@ def test_ii_large_models():
         # spot-check some similarities
         for n in pd.Series(ub_row.indices).sample(min(10, len(ub_row.indices))):
             n_id = algo_ub.item_index_[n]
-            n_rates = mc_rates.loc[n_id, :].set_index('user').rating
+            n_rates = mc_rates.loc[n_id, :].set_index("user").rating
             ir, nr = irates.align(n_rates, fill_value=0)
             cor = ir.corr(nr)
             assert mat_ub[ipos, n] == approx(cor)
 
         # short rows are equal
         if b_row.nnz < MODEL_SIZE:
-            _log.debug('short row of length %d', b_row.nnz)
+            _log.debug("short row of length %d", b_row.nnz)
             assert b_row.nnz == ub_row.nnz
             ub_row.sort_indices()
             b_row.sort_indices()
@@ -367,7 +370,7 @@ def test_ii_large_models():
         assert len(b_nbrs) <= MODEL_SIZE
         assert all(b_nbrs.index.isin(ub_nbrs.index))
         # the similarities should be equal!
-        b_match, ub_match = b_nbrs.align(ub_nbrs, join='inner')
+        b_match, ub_match = b_nbrs.align(ub_nbrs, join="inner")
         assert all(b_match == b_nbrs)
         assert b_match.values == approx(ub_match.values)
         assert b_nbrs.max() == approx(ub_nbrs.max())
@@ -385,19 +388,19 @@ def test_ii_large_models():
 def test_ii_save_load(tmp_path, ml_subset):
     "Save and load a model"
     original = knn.ItemItem(30, save_nbrs=500)
-    _log.info('building model')
+    _log.info("building model")
     original.fit(ml_subset)
 
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(original, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         algo = pickle.load(modf)
 
-    _log.info('checking model')
+    _log.info("checking model")
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
     # a little tolerance
@@ -421,7 +424,7 @@ def test_ii_save_load(tmp_path, ml_subset):
         assert all(np.diff(r_mat.values[sp:ep]) <= 0)
         assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(original.item_means_)
 
     matrix = algo.sim_matrix_.to_scipy()
@@ -430,7 +433,7 @@ def test_ii_save_load(tmp_path, ml_subset):
     items = items[algo.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
 
         row = matrix.getrow(ipos)
 
@@ -441,20 +444,20 @@ def test_ii_save_load(tmp_path, ml_subset):
 
 def test_ii_implicit_save_load(tmp_path, ml_subset):
     "Save and load a model"
-    original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum')
-    _log.info('building model')
-    original.fit(ml_subset.loc[:, ['user', 'item']])
+    original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate="sum")
+    _log.info("building model")
+    original.fit(ml_subset.loc[:, ["user", "item"]])
 
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(original, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         algo = pickle.load(modf)
 
-    _log.info('checking model')
+    _log.info("checking model")
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
     # a little tolerance
@@ -487,7 +490,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
     items = items[algo.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
 
         row = matrix.getrow(ipos)
 
@@ -499,8 +502,8 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
 @lktu.wantjit
 @mark.slow
 def test_ii_old_implicit():
-    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -514,10 +517,10 @@ def test_ii_old_implicit():
 @lktu.wantjit
 @mark.slow
 def test_ii_no_ratings():
-    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    a1.fit(ml_ratings.loc[:, ['user', 'item']])
+    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    a1.fit(ml_ratings.loc[:, ["user", "item"]])
 
-    algo = knn.ItemItem(20, save_nbrs=100, feedback='implicit')
+    algo = knn.ItemItem(20, save_nbrs=100, feedback="implicit")
 
     algo.fit(ml_ratings)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -533,8 +536,8 @@ def test_ii_no_ratings():
 
 @mark.slow
 def test_ii_implicit_fast_ident():
-    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -553,7 +556,7 @@ def test_ii_implicit_fast_ident():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_ii_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -567,18 +570,18 @@ def test_ii_batch_accuracy():
     algo = basic.Fallback(ii_algo, bias.Bias())
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test, n_jobs=4)
 
-    preds = pd.concat((eval(train, test)
-                       for (train, test)
-                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
+    preds = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    )
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.70, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.90, abs=0.05)
 
 
@@ -588,27 +591,27 @@ def test_ii_known_preds():
     from lenskit import batch
 
     algo = knn.ItemItem(20, min_sim=1.0e-6)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
     assert algo.center
     assert algo.item_means_ is not None
-    _log.info('model means: %s', algo.item_means_)
+    _log.info("model means: %s", algo.item_means_)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'item-item-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "item-item-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
+    pairs = known_preds.loc[:, ["user", "item"]]
 
     preds = batch.predict(algo, pairs)
-    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
+    merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds)
     assert len(merged) == len(preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     try:
         assert not any(merged.prediction.isna() & merged.expected.notna())
     except AssertionError as e:
         bad = merged[merged.prediction.isna() & merged.expected.notna()]
-        _log.error('erroneously missing or present predictions:\n%s', bad)
+        _log.error("erroneously missing or present predictions:\n%s", bad)
         raise e
 
     err = merged.error
@@ -617,33 +620,33 @@ def test_ii_known_preds():
         assert all(err.abs() < 0.03)  # FIXME this threshold is too high
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('erroneous predictions:\n%s', bad)
+        _log.error("erroneous predictions:\n%s", bad)
         raise e
 
 
 def _train_ii():
     algo = knn.ItemItem(20, min_sim=1.0e-6)
     timer = Stopwatch()
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
-    _log.info('trained in %s', timer)
+    _log.info("trained in %s", timer)
     shr = persist(algo)
     return shr.transfer()
 
 
 @lktu.wantjit
 @mark.slow
-@mark.skip('no longer testing II match')
-@mark.skipif(csrk.name != 'csr.kernels.mkl', reason='only needed when MKL is available')
+@mark.skip("no longer testing II match")
+@mark.skipif(csrk.name != "csr.kernels.mkl", reason="only needed when MKL is available")
 def test_ii_impl_match():
     mkl_h = None
     nba_h = None
     try:
-        with lktu.set_env_var('CSR_KERNEL', 'mkl'):
+        with lktu.set_env_var("CSR_KERNEL", "mkl"):
             mkl_h = run_sp(_train_ii)
         mkl = mkl_h.get()
 
-        with lktu.set_env_var('CSR_KERNEL', 'numba'):
+        with lktu.set_env_var("CSR_KERNEL", "numba"):
             nba_h = run_sp(_train_ii)
         nba = nba_h.get()
 
@@ -657,8 +660,9 @@ def test_ii_impl_match():
             assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0)
             assert all(np.diff(nba.sim_matrix_.values[sp:ep]) <= 0)
             assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(nba.sim_matrix_.colinds[sp:ep])
-            assert mkl.sim_matrix_.values[sp:ep] == \
-                approx(nba.sim_matrix_.values[sp:ep], abs=1.0e-3)
+            assert mkl.sim_matrix_.values[sp:ep] == approx(
+                nba.sim_matrix_.values[sp:ep], abs=1.0e-3
+            )
 
     finally:
         mkl = None
@@ -671,8 +675,8 @@ def test_ii_impl_match():
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K not available')
-@mark.parametrize('ncpus', [1, 2])
+@mark.skipif(not lktu.ml100k.available, reason="ML100K not available")
+@mark.parametrize("ncpus", [1, 2])
 def test_ii_batch_recommend(ncpus):
     import lenskit.crossfold as xf
     from lenskit import topn
@@ -680,11 +684,11 @@ def test_ii_batch_recommend(ncpus):
     ratings = lktu.ml100k.ratings
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo = knn.ItemItem(30)
         algo = Recommender.adapt(algo)
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus)
         return recs
 
@@ -697,21 +701,21 @@ def eval(train, test):
     test = pd.concat(test_frames)
     recs = pd.concat(recs)
 
-    _log.info('analyzing recommendations')
+    _log.info("analyzing recommendations")
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     results = rla.compute(recs, test)
     dcg = results.ndcg
-    _log.info('nDCG for %d users is %f', len(dcg), dcg.mean())
+    _log.info("nDCG for %d users is %f", len(dcg), dcg.mean())
     assert dcg.mean() > 0.03
 
 
 def _build_predict(ratings, fold):
     algo = Fallback(knn.ItemItem(20), Bias(5))
-    train = ratings[ratings['partition'] != fold]
+    train = ratings[ratings["partition"] != fold]
     algo.fit(train)
 
-    test = ratings[ratings['partition'] == fold]
+    test = ratings[ratings["partition"] == fold]
     preds = batch.predict(algo, test, n_jobs=1)
     return preds
 
@@ -721,7 +725,7 @@ def _build_predict(ratings, fold):
 def test_ii_parallel_multi_build():
     "Build multiple item-item models in parallel"
     ratings = lktu.ml_test.ratings
-    ratings['partition'] = np.random.choice(4, len(ratings), replace=True)
+    ratings["partition"] = np.random.choice(4, len(ratings), replace=True)
 
     with invoker(ratings, _build_predict, 2) as inv:
         preds = inv.map(range(4))
diff --git a/tests/test_knn_user_user.py b/tests/test_knn_user_user.py
index 663677091..7fd2c90a3 100644
--- a/tests/test_knn_user_user.py
+++ b/tests/test_knn_user_user.py
@@ -23,28 +23,28 @@ def test_uu_dft_config():
     algo = knn.UserUser(30)
     assert algo.nnbrs == 30
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_uu_exp_config():
-    algo = knn.UserUser(30, feedback='explicit')
+    algo = knn.UserUser(30, feedback="explicit")
     assert algo.nnbrs == 30
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_uu_imp_config():
-    algo = knn.UserUser(30, feedback='implicit')
+    algo = knn.UserUser(30, feedback="implicit")
     assert algo.nnbrs == 30
     assert not algo.center
-    assert algo.aggregate == 'sum'
+    assert algo.aggregate == "sum"
     assert not algo.use_ratings
 
 
 def test_uu_imp_clone():
-    algo = knn.UserUser(30, feedback='implicit')
+    algo = knn.UserUser(30, feedback="implicit")
     a2 = clone(algo)
 
     assert a2.get_params() == algo.get_params()
@@ -57,22 +57,24 @@ def test_uu_train():
     assert ret is algo
 
     # it should have computed correct means
-    umeans = ml_ratings.groupby('user').rating.mean()
-    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
+    umeans = ml_ratings.groupby("user").rating.mean()
+    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean")
     umeans, mlmeans = umeans.align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(['user', 'item']).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     r_items = algo.transpose_matrix_.rowinds()
-    ui_rbdf = pd.DataFrame({
-        'user': algo.user_index_[algo.transpose_matrix_.colinds],
-        'item': algo.item_index_[r_items],
-        'nrating': algo.transpose_matrix_.values
-    }).set_index(['user', 'item'])
+    ui_rbdf = pd.DataFrame(
+        {
+            "user": algo.user_index_[algo.transpose_matrix_.colinds],
+            "item": algo.item_index_[r_items],
+            "nrating": algo.transpose_matrix_.values,
+        }
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
-    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
-    ui_rbdf['orig_rating'] = uir
+    ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
+    ui_rbdf["orig_rating"] = uir
     assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
 
 
@@ -122,7 +124,7 @@ def test_uu_predict_live_ratings():
     no4 = ml_ratings[ml_ratings.user != 4]
     algo.fit(no4)
 
-    ratings = ml_ratings[ml_ratings.user == 4].set_index('item').rating
+    ratings = ml_ratings[ml_ratings.user == 4].set_index("item").rating
 
     preds = algo.predict_for_user(20381, [1016, 2091], ratings)
     assert len(preds) == 2
@@ -132,37 +134,39 @@ def test_uu_predict_live_ratings():
 
 def test_uu_save_load(tmp_path):
     orig = knn.UserUser(30)
-    _log.info('training model')
+    _log.info("training model")
     orig.fit(ml_ratings)
 
-    fn = tmp_path / 'uu.model'
-    _log.info('saving to %s', fn)
-    with fn.open('wb') as f:
+    fn = tmp_path / "uu.model"
+    _log.info("saving to %s", fn)
+    with fn.open("wb") as f:
         pickle.dump(orig, f)
 
-    _log.info('reloading model')
-    with fn.open('rb') as f:
+    _log.info("reloading model")
+    with fn.open("rb") as f:
         algo = pickle.load(f)
 
-    _log.info('checking model')
+    _log.info("checking model")
 
     # it should have computed correct means
-    umeans = ml_ratings.groupby('user').rating.mean()
-    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
+    umeans = ml_ratings.groupby("user").rating.mean()
+    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean")
     umeans, mlmeans = umeans.align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(['user', 'item']).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     r_items = algo.transpose_matrix_.rowinds()
-    ui_rbdf = pd.DataFrame({
-        'user': algo.user_index_[algo.transpose_matrix_.colinds],
-        'item': algo.item_index_[r_items],
-        'nrating': algo.transpose_matrix_.values
-    }).set_index(['user', 'item'])
+    ui_rbdf = pd.DataFrame(
+        {
+            "user": algo.user_index_[algo.transpose_matrix_.colinds],
+            "item": algo.item_index_[r_items],
+            "nrating": algo.transpose_matrix_.values,
+        }
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
-    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
-    ui_rbdf['orig_rating'] = uir
+    ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
+    ui_rbdf["orig_rating"] = uir
     assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
 
     # running the predictor should work
@@ -183,8 +187,8 @@ def test_uu_predict_unknown_empty():
 
 def test_uu_implicit():
     "Train and use user-user on an implicit data set."
-    algo = knn.UserUser(20, feedback='implicit')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.UserUser(20, feedback="implicit")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.user_means_ is None
@@ -200,8 +204,8 @@ def test_uu_implicit():
 @mark.slow
 def test_uu_save_load_implicit(tmp_path):
     "Save and load user-user on an implicit data set."
-    orig = knn.UserUser(20, feedback='implicit')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    orig = knn.UserUser(20, feedback="implicit")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     orig.fit(data)
     ser = pickle.dumps(orig)
@@ -226,25 +230,25 @@ def test_uu_known_preds():
     from lenskit import batch
 
     algo = knn.UserUser(30, min_sim=1.0e-6)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'user-user-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "user-user-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
-    _log.info('generating %d known predictions', len(pairs))
+    pairs = known_preds.loc[:, ["user", "item"]]
+    _log.info("generating %d known predictions", len(pairs))
 
     preds = batch.predict(algo, pairs)
-    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
+    merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds)
     assert len(merged) == len(preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     try:
         assert not any(merged.prediction.isna() & merged.expected.notna())
     except AssertionError as e:
         bad = merged[merged.prediction.isna() & merged.expected.notna()]
-        _log.error('%d missing predictions:\n%s', len(bad), bad)
+        _log.error("%d missing predictions:\n%s", len(bad), bad)
         raise e
 
     err = merged.error
@@ -253,22 +257,23 @@ def test_uu_known_preds():
         assert all(err.abs() < 0.01)
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('%d erroneous predictions:\n%s', len(bad), bad)
+        _log.error("%d erroneous predictions:\n%s", len(bad), bad)
         raise e
 
 
 def __batch_eval(job):
     from lenskit import batch
+
     algo, train, test = job
-    _log.info('running training')
+    _log.info("running training")
     algo.fit(train)
-    _log.info('testing %d users', test.user.nunique())
+    _log.info("testing %d users", test.user.nunique())
     return batch.predict(algo, test)
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_uu_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -286,30 +291,30 @@ def test_uu_batch_accuracy():
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.71, abs=0.05)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.91, abs=0.055)
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_uu_implicit_batch_accuracy():
     from lenskit import batch, topn
     import lenskit.crossfold as xf
 
     ratings = lktu.ml100k.ratings
 
-    algo = knn.UserUser(30, center=False, aggregate='sum')
+    algo = knn.UserUser(30, center=False, aggregate="sum")
 
     folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
     all_test = pd.concat(f.test for f in folds)
 
     rec_lists = []
     for train, test in folds:
-        _log.info('running training')
+        _log.info("running training")
         rec_algo = Recommender.adapt(algo)
-        rec_algo.fit(train.loc[:, ['user', 'item']])
-        _log.info('testing %d users', test.user.nunique())
+        rec_algo.fit(train.loc[:, ["user", "item"]])
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2)
         rec_lists.append(recs)
     recs = pd.concat(rec_lists)
diff --git a/tests/test_math_solve.py b/tests/test_math_solve.py
index 9568c4da2..cac54caa0 100644
--- a/tests/test_math_solve.py
+++ b/tests/test_math_solve.py
@@ -17,7 +17,7 @@ def square_problem(draw, scale=10):
     size = draw(st.integers(2, 100))
 
     # Hypothesis doesn't do well at generating problem data, so go with this
-    seed = draw(st.integers(min_value=0, max_value=2**32-1))
+    seed = draw(st.integers(min_value=0, max_value=2**32 - 1))
     rng = np.random.RandomState(seed)
     A = rng.randn(size, size) * scale
     b = rng.randn(size) * scale
diff --git a/tests/test_matrix.py b/tests/test_matrix.py
index bef4f71cb..408d7b3b3 100644
--- a/tests/test_matrix.py
+++ b/tests/test_matrix.py
@@ -17,17 +17,17 @@ def test_sparse_matrix(rng):
     assert mat.ncols == ratings.item.nunique()
 
     # user indicators should correspond to user item counts
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     ucounts = ucounts.loc[uidx].cumsum()
     assert all(mat.rowptrs[1:] == ucounts.values)
 
     # verify rating values
-    ratings = ratings.set_index(['user', 'item'])
+    ratings = ratings.set_index(["user", "item"])
     for u in rng.choice(uidx, size=50):
         ui = uidx.get_loc(u)
         vs = mat.row_vs(ui)
         vs = pd.Series(vs, iidx[mat.row_cs(ui)])
-        rates = ratings.loc[u]['rating']
+        rates = ratings.loc[u]["rating"]
         vs, rates = vs.align(rates)
         assert not any(vs.isna())
         assert not any(rates.isna())
@@ -36,7 +36,7 @@ def test_sparse_matrix(rng):
 
 def test_sparse_matrix_implicit():
     ratings = ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     mat, uidx, iidx = sparse_ratings(ratings)
 
     assert mat.nrows == len(uidx)
@@ -47,11 +47,11 @@ def test_sparse_matrix_implicit():
 
 
 @mark.parametrize(
-    'format, sps_fmt_checker',
+    "format, sps_fmt_checker",
     [
         (True, sps.isspmatrix_csr),
-        ('csr', sps.isspmatrix_csr),
-        ('coo', sps.isspmatrix_coo),
+        ("csr", sps.isspmatrix_csr),
+        ("coo", sps.isspmatrix_coo),
     ],
 )
 def test_sparse_matrix_scipy(format, sps_fmt_checker):
@@ -64,7 +64,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker):
     assert len(iidx) == ratings.item.nunique()
 
     # user indicators should correspond to user item counts
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     ucounts = ucounts.loc[uidx].cumsum()
     if sps.isspmatrix_coo(mat):
         mat = mat.tocsr()
@@ -73,7 +73,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker):
 
 def test_sparse_matrix_scipy_implicit():
     ratings = ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     mat, uidx, iidx = sparse_ratings(ratings, scipy=True)
 
     assert sps.issparse(mat)
@@ -86,8 +86,8 @@ def test_sparse_matrix_scipy_implicit():
 
 def test_sparse_matrix_indexes(rng):
     ratings = ml_test.ratings
-    uidx = pd.Index(rng.permutation(ratings['user'].unique()))
-    iidx = pd.Index(rng.permutation(ratings['item'].unique()))
+    uidx = pd.Index(rng.permutation(ratings["user"].unique()))
+    iidx = pd.Index(rng.permutation(ratings["item"].unique()))
 
     mat, _uidx, _iidx = sparse_ratings(ratings, users=uidx, items=iidx)
 
@@ -97,12 +97,12 @@ def test_sparse_matrix_indexes(rng):
     assert len(_iidx) == ratings.item.nunique()
 
     # verify rating values
-    ratings = ratings.set_index(['user', 'item'])
+    ratings = ratings.set_index(["user", "item"])
     for u in rng.choice(_uidx, size=50):
         ui = _uidx.get_loc(u)
         vs = mat.row_vs(ui)
         vs = pd.Series(vs, _iidx[mat.row_cs(ui)])
-        rates = ratings.loc[u]['rating']
+        rates = ratings.loc[u]["rating"]
         vs, rates = vs.align(rates)
         assert not any(vs.isna())
         assert not any(rates.isna())
diff --git a/tests/test_ml20m.py b/tests/test_ml20m.py
index b36df4d50..3b4387eac 100644
--- a/tests/test_ml20m.py
+++ b/tests/test_ml20m.py
@@ -16,6 +16,7 @@
 from lenskit.algorithms.basic import Popular
 from lenskit.algorithms.als import BiasedMF
 from lenskit.algorithms import item_knn as knn
+
 try:
     import lenskit_tf
 except:
@@ -28,7 +29,7 @@
 
 _log = logging.getLogger(__name__)
 
-_ml_path = Path('data/ml-20m')
+_ml_path = Path("data/ml-20m")
 if _ml_path.exists():
     _ml_20m = MovieLens(_ml_path)
 else:
@@ -40,36 +41,36 @@ def ml20m():
     if _ml_20m:
         return _ml_20m.ratings
     else:
-        pytest.skip('ML-20M not available')
+        pytest.skip("ML-20M not available")
 
 
 @pytest.mark.slow
 @pytest.mark.realdata
-@pytest.mark.parametrize('n_jobs', [1, 2])
+@pytest.mark.parametrize("n_jobs", [1, 2])
 def test_pop_recommend(ml20m, rng, n_jobs):
-    users = rng.choice(ml20m['user'].unique(), 10000, replace=False)
+    users = rng.choice(ml20m["user"].unique(), 10000, replace=False)
     algo = Popular()
-    _log.info('training %s', algo)
+    _log.info("training %s", algo)
     algo.fit(ml20m)
-    _log.info('recommending with %s', algo)
+    _log.info("recommending with %s", algo)
     recs = batch.recommend(algo, users, 10, n_jobs=n_jobs)
 
-    assert recs['user'].nunique() == 10000
+    assert recs["user"].nunique() == 10000
 
 
 @pytest.mark.realdata
 @pytest.mark.slow
 def test_als_isolate(ml20m, rng):
-    users = rng.choice(ml20m['user'].unique(), 5000, replace=False)
+    users = rng.choice(ml20m["user"].unique(), 5000, replace=False)
     algo = BiasedMF(20, iterations=10)
     algo = Recommender.adapt(algo)
-    _log.info('training %s', algo)
+    _log.info("training %s", algo)
     ares = batch.train_isolated(algo, ml20m)
     try:
-        _log.info('recommending with %s', algo)
+        _log.info("recommending with %s", algo)
         recs = batch.recommend(ares, users, 10)
-        assert recs['user'].nunique() == 5000
-        _log.info('predicting with %s', algo)
+        assert recs["user"].nunique() == 5000
+        _log.info("predicting with %s", algo)
         pairs = ml20m.sample(1000)
         preds = batch.predict(ares, pairs)
         assert len(preds) == len(pairs)
@@ -80,14 +81,16 @@ def test_als_isolate(ml20m, rng):
 @pytest.mark.realdata
 @pytest.mark.slow
 @pytest.mark.skip
-@pytest.mark.skipif(lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason='TensorFlow not available')
+@pytest.mark.skipif(
+    lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason="TensorFlow not available"
+)
 def test_tf_isvd(ml20m):
     algo = lenskit_tf.IntegratedBiasMF(20)
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2))
@@ -95,5 +98,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.60, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_parallel.py b/tests/test_parallel.py
index 654be1588..6bfb3434c 100644
--- a/tests/test_parallel.py
+++ b/tests/test_parallel.py
@@ -19,11 +19,11 @@ def _mul_op(m, v):
 
 
 def _worker_status(blob, *args):
-    _log.info('in worker %s', mp.current_process().name)
+    _log.info("in worker %s", mp.current_process().name)
     return os.getpid(), is_worker(), is_mp_worker()
 
 
-@mark.parametrize('n_jobs', [None, 1, 2, 8])
+@mark.parametrize("n_jobs", [None, 1, 2, 8])
 def test_invoke_matrix(n_jobs):
     matrix = np.random.randn(100, 100)
     vectors = [np.random.randn(100) for i in range(100)]
@@ -35,51 +35,51 @@ def test_invoke_matrix(n_jobs):
 
 
 def test_mp_is_worker():
-    with invoker('foo', _worker_status, 2) as loop:
+    with invoker("foo", _worker_status, 2) as loop:
         res = list(loop.map(range(10)))
         assert all([w for (pid, w, mpw) in res])
         assert all([mpw for (pid, w, mpw) in res])
 
 
 def test_proc_count_default():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count() == mp.cpu_count() // 2
         assert proc_count(level=1) == 2
 
 
 def test_proc_count_no_div():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count(1) == mp.cpu_count()
 
 
 def test_proc_count_env():
-    with set_env_var('LK_NUM_PROCS', '17'):
+    with set_env_var("LK_NUM_PROCS", "17"):
         assert proc_count() == 17
         assert proc_count(level=1) == 1
 
 
 def test_proc_count_max():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count(max_default=1) == 1
 
 
 def test_proc_count_nest_env():
-    with set_env_var('LK_NUM_PROCS', '7,3'):
+    with set_env_var("LK_NUM_PROCS", "7,3"):
         assert proc_count() == 7
         assert proc_count(level=1) == 3
         assert proc_count(level=2) == 1
 
 
 def _sp_matmul(a1, a2, *, fail=False):
-    _log.info('in worker process')
+    _log.info("in worker process")
     if fail:
-        raise RuntimeError('you rang?')
+        raise RuntimeError("you rang?")
     else:
         return a1 @ a2
 
 
 def _sp_matmul_p(a1, a2, *, method=None, fail=False):
-    _log.info('in worker process')
+    _log.info("in worker process")
     return persist(a1 @ a2, method=method).transfer()
 
 
@@ -99,10 +99,10 @@ def test_run_sp_fail():
         run_sp(_sp_matmul, a1, a2, fail=True)
 
 
-@pytest.mark.parametrize('method', [None, 'binpickle', 'shm'])
+@pytest.mark.parametrize("method", [None, "binpickle", "shm"])
 def test_run_sp_persist(method):
-    if method == 'shm' and not SHM_AVAILABLE:
-        pytest.skip('SHM backend not available')
+    if method == "shm" and not SHM_AVAILABLE:
+        pytest.skip("SHM backend not available")
 
     a1 = np.random.randn(100, 100)
     a2 = np.random.randn(100, 100)
@@ -116,7 +116,7 @@ def test_run_sp_persist(method):
 
 
 def test_sp_is_worker():
-    pid, w, mpw = run_sp(_worker_status, 'fishtank')
+    pid, w, mpw = run_sp(_worker_status, "fishtank")
     assert pid != os.getpid()
     assert w
     assert not mpw
@@ -131,4 +131,4 @@ def test_sp_random_seed():
     seed = run_sp(_get_seed)
     # we should spawn a seed for the worker
     assert seed.entropy == init.entropy
-    assert seed.spawn_key == (init.n_children_spawned - 1, )
+    assert seed.spawn_key == (init.n_children_spawned - 1,)
diff --git a/tests/test_popular.py b/tests/test_popular.py
index b7be88cb0..790bd98f5 100644
--- a/tests/test_popular.py
+++ b/tests/test_popular.py
@@ -6,15 +6,15 @@
 
 import lenskit.util.test as lktu
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_popular():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     assert algo.item_pop_.max() == counts.max()
@@ -31,7 +31,7 @@ def test_popular():
 def test_popular_excludes_rated():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     recs = algo.recommend(100, 100)
@@ -40,16 +40,16 @@ def test_popular_excludes_rated():
 
     # make sure we didn't recommend anything the user likes
     ratings = lktu.ml_test.ratings
-    urates = ratings.set_index(['user', 'item'])
+    urates = ratings.set_index(["user", "item"])
     urates = urates.loc[100, :]
-    match = recs.join(urates, on='item', how='inner')
+    match = recs.join(urates, on="item", how="inner")
     assert len(match) == 0
 
 
 def test_pop_candidates():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     items = lktu.ml_test.ratings.item.unique()
 
     assert algo.item_pop_.max() == counts.max()
@@ -75,7 +75,7 @@ def test_pop_save_load():
     mod = pickle.dumps(original)
     algo = pickle.loads(mod)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     assert algo.item_pop_.max() == counts.max()
@@ -95,7 +95,7 @@ def test_popscore_quantile(rng):
 
     assert algo.item_scores_.max() == 1.0
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.sort_values()
 
     winner = counts.index[-1]
@@ -103,10 +103,10 @@ def test_popscore_quantile(rng):
 
 
 def test_popscore_rank(rng):
-    algo = basic.PopScore('rank')
+    algo = basic.PopScore("rank")
     algo.fit(lktu.ml_test.ratings)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.sort_values()
 
     assert algo.item_scores_.max() == len(counts)
@@ -116,10 +116,10 @@ def test_popscore_rank(rng):
 
 
 def test_popscore_counts(rng):
-    algo = basic.PopScore('count')
+    algo = basic.PopScore("count")
     algo.fit(lktu.ml_test.ratings)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
 
     scores, counts = algo.item_scores_.align(counts)
     assert all(scores == counts)
diff --git a/tests/test_predict_metrics.py b/tests/test_predict_metrics.py
index f1797e6e7..461f250bc 100644
--- a/tests/test_predict_metrics.py
+++ b/tests/test_predict_metrics.py
@@ -9,44 +9,44 @@
 
 
 def test_check_missing_empty():
-    pm._check_missing(pd.Series([], dtype='float64'), 'error')
+    pm._check_missing(pd.Series([], dtype="float64"), "error")
     # should pass
     assert True
 
 
 def test_check_missing_has_values():
-    pm._check_missing(pd.Series([1, 3, 2]), 'error')
+    pm._check_missing(pd.Series([1, 3, 2]), "error")
     # should pass
     assert True
 
 
 def test_check_missing_nan_raises():
     with raises(ValueError):
-        pm._check_missing(pd.Series([1, np.nan, 3]), 'error')
+        pm._check_missing(pd.Series([1, np.nan, 3]), "error")
 
 
 def test_check_missing_raises():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='left')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="left")
     with raises(ValueError):
-        pm._check_missing(data, 'error')
+        pm._check_missing(data, "error")
 
 
 def test_check_joined_ok():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='inner')
-    pm._check_missing(ref, 'error')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="inner")
+    pm._check_missing(ref, "error")
     # should get here
     assert True
 
 
 def test_check_missing_ignore():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='left')
-    pm._check_missing(data, 'ignore')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="left")
+    pm._check_missing(data, "ignore")
     # should get here
     assert True
 
@@ -103,18 +103,19 @@ def test_rmse_series_two():
 
 
 def test_rmse_series_subset_axis():
-    rmse = pm.rmse(pd.Series([1, 3], ['a', 'c']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
+    rmse = pm.rmse(pd.Series([1, 3], ["a", "c"]), pd.Series([3, 4, 1], ["a", "b", "c"]))
     assert rmse == approx(2)
 
 
 def test_rmse_series_missing_value_error():
     with raises(ValueError):
-        pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
+        pm.rmse(pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"]))
 
 
 def test_rmse_series_missing_value_ignore():
-    rmse = pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']),
-                   missing='ignore')
+    rmse = pm.rmse(
+        pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"]), missing="ignore"
+    )
     assert rmse == approx(2)
 
 
@@ -159,7 +160,7 @@ def test_mae_series_two():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_batch_rmse():
     import lenskit.crossfold as xf
     import lenskit.batch as batch
@@ -171,13 +172,13 @@ def test_batch_rmse():
     def eval(train, test):
         algo.fit(train)
         preds = batch.predict(algo, test)
-        return preds.set_index(['user', 'item'])
+        return preds.set_index(["user", "item"])
 
-    results = pd.concat((eval(train, test)
-                         for (train, test)
-                         in xf.partition_users(ratings, 5, xf.SampleN(5))))
+    results = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5)))
+    )
 
-    user_rmse = results.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = results.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
 
     # we should have all users
     users = ratings.user.unique()
@@ -224,9 +225,9 @@ def test_user_metric():
     preds = batch.predict(algo, test)
 
     rmse = pm.user_metric(preds)
-    u_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    u_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert rmse == approx(u_rmse.mean())
 
     mae = pm.user_metric(preds, metric=pm.mae)
-    u_mae = preds.groupby('user').apply(lambda df: pm.mae(df.prediction, df.rating))
+    u_mae = preds.groupby("user").apply(lambda df: pm.mae(df.prediction, df.rating))
     assert mae == approx(u_mae.mean())
diff --git a/tests/test_rerank.py b/tests/test_rerank.py
index f496fd9b2..d0f7f1988 100644
--- a/tests/test_rerank.py
+++ b/tests/test_rerank.py
@@ -9,10 +9,10 @@
 
 def test_plackett_luce_rec():
     pop = PopScore()
-    algo = PlackettLuce(pop, rng_spec='user')
+    algo = PlackettLuce(pop, rng_spec="user")
     algo.fit(lktu.ml_test.ratings)
 
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     recs1 = algo.recommend(2038, 100)
@@ -21,19 +21,19 @@ def test_plackett_luce_rec():
     assert len(recs2) == 100
 
     # we don't get exactly the same set of recs
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_plackett_luce_pred():
     bias = Bias()
-    algo = PlackettLuce(bias, rng_spec='user')
+    algo = PlackettLuce(bias, rng_spec="user")
     algo.fit(lktu.ml_test.ratings)
 
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     recs1 = algo.recommend(2038, 100)
@@ -42,8 +42,8 @@ def test_plackett_luce_pred():
     assert len(recs2) == 100
 
     # we don't get exactly the same set of recs
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
diff --git a/tests/test_sharing.py b/tests/test_sharing.py
index 3ccc206c2..8e1033029 100644
--- a/tests/test_sharing.py
+++ b/tests/test_sharing.py
@@ -33,7 +33,7 @@ def test_persist_bpk():
         share.close()
 
 
-@mark.skipif(not lks.SHM_AVAILABLE, reason='shared_memory not available')
+@mark.skipif(not lks.SHM_AVAILABLE, reason="shared_memory not available")
 def test_persist_shm():
     matrix = np.random.randn(1000, 100)
     share = lks.persist_shm(matrix)
@@ -62,7 +62,7 @@ def test_persist():
 def test_persist_dir(tmp_path):
     "Test persistence with a configured directory"
     matrix = np.random.randn(1000, 100)
-    with lktu.set_env_var('LK_TEMP_DIR', os.fspath(tmp_path)):
+    with lktu.set_env_var("LK_TEMP_DIR", os.fspath(tmp_path)):
         share = lks.persist(matrix)
         assert isinstance(share, lks.BPKPersisted)
 
@@ -79,7 +79,7 @@ def test_persist_method():
     "Test persistence with a specified method"
     matrix = np.random.randn(1000, 100)
 
-    share = lks.persist(matrix, method='binpickle')
+    share = lks.persist(matrix, method="binpickle")
     assert isinstance(share, lks.BPKPersisted)
 
     try:
diff --git a/tests/test_svd.py b/tests/test_svd.py
index a62310ebb..ee4d7653b 100644
--- a/tests/test_svd.py
+++ b/tests/test_svd.py
@@ -13,11 +13,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
-need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason='scikit-learn not installed')
+need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason="scikit-learn not installed")
 
 
 @need_skl
@@ -71,6 +71,7 @@ def test_svd_clone():
     assert a2.bias.user_damping == algo.bias.user_damping
     assert a2.bias.item_damping == algo.bias.item_damping
 
+
 @need_skl
 @mark.slow
 def test_svd_save_load():
@@ -80,7 +81,7 @@ def test_svd_save_load():
     original.fit(ratings)
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
     algo = pickle.loads(mod)
 
     assert algo.bias.mean_ == original.bias.mean_
@@ -92,7 +93,7 @@ def test_svd_save_load():
 @need_skl
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_svd_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -106,9 +107,9 @@ def test_svd_batch_accuracy():
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
@@ -116,5 +117,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_topn_analysis.py b/tests/test_topn_analysis.py
index cfe2ee703..1253b8b0e 100644
--- a/tests/test_topn_analysis.py
+++ b/tests/test_topn_analysis.py
@@ -19,18 +19,21 @@
 
 def test_split_keys():
     rla = topn.RecListAnalysis()
-    recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score'],
-                                ['user', 'item', 'rating'])
-    assert truth == ['user']
-    assert recs == ['algorithm', 'user']
+    recs, truth = topn._df_keys(
+        ["algorithm", "user", "item", "rank", "score"], ["user", "item", "rating"]
+    )
+    assert truth == ["user"]
+    assert recs == ["algorithm", "user"]
 
 
 def test_split_keys_gcol():
-    recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score', 'fishtank'],
-                                ['user', 'item', 'rating'],
-                                ['algorithm', 'fishtank', 'user'])
-    assert truth == ['user']
-    assert recs == ['algorithm', 'fishtank', 'user']
+    recs, truth = topn._df_keys(
+        ["algorithm", "user", "item", "rank", "score", "fishtank"],
+        ["user", "item", "rating"],
+        ["algorithm", "fishtank", "user"],
+    )
+    assert truth == ["user"]
+    assert recs == ["algorithm", "fishtank", "user"]
 
 
 def test_run_one():
@@ -38,10 +41,10 @@ def test_run_one():
     rla.add_metric(topn.precision)
     rla.add_metric(topn.recall)
 
-    recs = pd.DataFrame({'user': 1, 'item': [2]})
-    recs.name = 'recs'
-    truth = pd.DataFrame({'user': 1, 'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth.name = 'truth'
+    recs = pd.DataFrame({"user": 1, "item": [2]})
+    recs.name = "recs"
+    truth = pd.DataFrame({"user": 1, "item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth.name = "truth"
 
     print(recs)
     print(truth)
@@ -49,13 +52,13 @@ def test_run_one():
     res = rla.compute(recs, truth)
     print(res)
 
-    assert res.index.name == 'user'
+    assert res.index.name == "user"
     assert res.index.is_unique
 
     assert len(res) == 1
     assert all(res.index == 1)
     assert all(res.precision == 1.0)
-    assert res.recall.values == approx(1/3)
+    assert res.recall.values == approx(1 / 3)
 
 
 def test_run_two():
@@ -64,17 +67,21 @@ def test_run_two():
     rla.add_metric(topn.recall)
     rla.add_metric(topn.ndcg)
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2]
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
 
     def prog(inner):
         assert len(inner) == 2
@@ -86,101 +93,110 @@ def prog(inner):
     assert res.columns.nlevels == 1
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
     partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
     assert res.ndcg.values == approx([1.0, partial_ndcg])
-    assert res.precision.values == approx([1.0, 1/2])
-    assert res.recall.values == approx([1.0, 1/3])
+    assert res.precision.values == approx([1.0, 1 / 2])
+    assert res.recall.values == approx([1.0, 1 / 3])
 
 
 def test_inner_format():
     rla = topn.RecListAnalysis()
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2]
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
-
-    def inner(recs, truth, foo='a'):
-        assert foo == 'b'
-        assert set(recs.columns) == set(['LKRecID', 'LKTruthID', 'item', 'rank'])
-        assert truth.index.name == 'item'
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
+
+    def inner(recs, truth, foo="a"):
+        assert foo == "b"
+        assert set(recs.columns) == set(["LKRecID", "LKTruthID", "item", "rank"])
+        assert truth.index.name == "item"
         assert truth.index.is_unique
         print(truth)
-        assert all(truth.columns == ['rating'])
-        return len(recs.join(truth, on='item', how='inner'))
-    rla.add_metric(inner, name='bob', foo='b')
+        assert all(truth.columns == ["rating"])
+        return len(recs.join(truth, on="item", how="inner"))
+
+    rla.add_metric(inner, name="bob", foo="b")
 
     res = rla.compute(recs, truth)
     print(res)
 
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
-    assert all(res['bob'] == [3, 1])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
+    assert all(res["bob"] == [3, 1])
 
 
 def test_spec_group_cols():
-    rla = topn.RecListAnalysis(group_cols=['data', 'user'])
+    rla = topn.RecListAnalysis(group_cols=["data", "user"])
     rla.add_metric(topn.precision)
     rla.add_metric(topn.recall)
     rla.add_metric(topn.ndcg)
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2],
-        'wombat': np.random.randn(5)
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+            "wombat": np.random.randn(5),
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
 
     res = rla.compute(recs, truth)
     print(res)
 
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
     partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
     assert res.ndcg.values == approx([1.0, partial_ndcg])
-    assert res.precision.values == approx([1.0, 1/2])
-    assert res.recall.values == approx([1.0, 1/3])
+    assert res.precision.values == approx([1.0, 1 / 2])
+    assert res.recall.values == approx([1.0, 1 / 3])
 
 
 def test_java_equiv():
     dir = Path(__file__).parent
-    metrics = pd.read_csv(str(dir / 'topn-java-metrics.csv'))
-    recs = pd.read_csv(str(dir / 'topn-java-recs.csv'))
-    truth = pd.read_csv(str(dir / 'topn-java-truth.csv'))
+    metrics = pd.read_csv(str(dir / "topn-java-metrics.csv"))
+    recs = pd.read_csv(str(dir / "topn-java-recs.csv"))
+    truth = pd.read_csv(str(dir / "topn-java-truth.csv"))
 
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     res = rla.compute(recs, truth)
 
     umm = pd.merge(metrics, res.reset_index())
-    umm['err'] = umm['ndcg'] - umm['Java.nDCG']
-    _log.info('merged: \n%s', umm)
-    assert umm['err'].values == approx(0, abs=1.0e-6)
+    umm["err"] = umm["ndcg"] - umm["Java.nDCG"]
+    _log.info("merged: \n%s", umm)
+    assert umm["err"].values == approx(0, abs=1.0e-6)
 
 
 @mark.slow
@@ -196,20 +212,20 @@ def test_fill_users():
     train, test = next(splits)
     algo.fit(train)
 
-    rec_users = test['user'].sample(50).unique()
+    rec_users = test["user"].sample(50).unique()
     assert len(rec_users) < 50
     recs = batch.recommend(algo, rec_users, 25)
 
     scores = rla.compute(recs, test, include_missing=True)
-    assert len(scores) == test['user'].nunique()
-    assert scores['recall'].notna().sum() == len(rec_users)
-    assert all(scores['ntruth'] == 5)
+    assert len(scores) == test["user"].nunique()
+    assert scores["recall"].notna().sum() == len(rec_users)
+    assert all(scores["ntruth"] == 5)
 
     mscores = rla.compute(recs, test)
     assert len(mscores) < len(scores)
 
-    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
-    recall, mrecall = recall.align(mscores['recall'])
+    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
+    recall, mrecall = recall.align(mscores["recall"])
     assert all(recall == mrecall)
 
 
@@ -229,65 +245,63 @@ def test_adv_fill_users():
     all_test = {}
     for i, (train, test) in enumerate(splits):
         a_uu.fit(train)
-        rec_users = test['user'].sample(50).unique()
-        all_recs[(i+1, 'UU')] = batch.recommend(a_uu, rec_users, 25)
+        rec_users = test["user"].sample(50).unique()
+        all_recs[(i + 1, "UU")] = batch.recommend(a_uu, rec_users, 25)
 
         a_ii.fit(train)
-        rec_users = test['user'].sample(50).unique()
-        all_recs[(i+1, 'II')] = batch.recommend(a_ii, rec_users, 25)
-        all_test[i+1] = test
+        rec_users = test["user"].sample(50).unique()
+        all_recs[(i + 1, "II")] = batch.recommend(a_ii, rec_users, 25)
+        all_test[i + 1] = test
 
-    recs = pd.concat(all_recs, names=['part', 'algo'])
-    recs.reset_index(['part', 'algo'], inplace=True)
+    recs = pd.concat(all_recs, names=["part", "algo"])
+    recs.reset_index(["part", "algo"], inplace=True)
     recs.reset_index(drop=True, inplace=True)
 
-    test = pd.concat(all_test, names=['part'])
-    test.reset_index(['part'], inplace=True)
+    test = pd.concat(all_test, names=["part"])
+    test.reset_index(["part"], inplace=True)
     test.reset_index(drop=True, inplace=True)
 
     scores = rla.compute(recs, test, include_missing=True)
     inames = scores.index.names
     scores.sort_index(inplace=True)
     assert len(scores) == 50 * 4
-    assert all(scores['ntruth'] == 5)
-    assert scores['recall'].isna().sum() > 0
-    _log.info('scores:\n%s', scores)
+    assert all(scores["ntruth"] == 5)
+    assert scores["recall"].isna().sum() > 0
+    _log.info("scores:\n%s", scores)
 
-    ucounts = scores.reset_index().groupby('algo')['user'].agg(['count', 'nunique'])
-    assert all(ucounts['count'] == 100)
-    assert all(ucounts['nunique'] == 100)
+    ucounts = scores.reset_index().groupby("algo")["user"].agg(["count", "nunique"])
+    assert all(ucounts["count"] == 100)
+    assert all(ucounts["nunique"] == 100)
 
     mscores = rla.compute(recs, test)
     mscores = mscores.reset_index().set_index(inames)
     mscores.sort_index(inplace=True)
     assert len(mscores) < len(scores)
-    _log.info('mscores:\n%s', mscores)
+    _log.info("mscores:\n%s", mscores)
 
-    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
-    recall, mrecall = recall.align(mscores['recall'])
+    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
+    recall, mrecall = recall.align(mscores["recall"])
     assert all(recall == mrecall)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_pr_bulk_match(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = topn.RecListAnalysis()
     rla.add_metric(precision)
     rla.add_metric(recall)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: precision(*a), name='ind_p')
-    rla.add_metric(lambda *a: recall(*a), name='ind_r')
+    rla.add_metric(lambda *a: precision(*a), name="ind_p")
+    rla.add_metric(lambda *a: recall(*a), name="ind_r")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('precision mismatches:\n%s',
-              res[res.precision != res.ind_p])
-    _log.info('recall mismatches:\n%s',
-              res[res.recall != res.ind_r])
+    _log.info("precision mismatches:\n%s", res[res.precision != res.ind_p])
+    _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r])
 
     assert res.precision.values == approx(res.ind_p.values)
     assert res.recall.values == approx(res.ind_r.values)
diff --git a/tests/test_topn_hit.py b/tests/test_topn_hit.py
index f907da2f6..b664023f6 100644
--- a/tests/test_topn_hit.py
+++ b/tests/test_topn_hit.py
@@ -12,8 +12,8 @@
 
 
 def _test_hit(items, rel, **kwargs):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return hit(recs, truth, **kwargs)
 
 
@@ -81,7 +81,7 @@ def test_hit_series_array():
     hr = _test_hit(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert hr == 1
 
-    hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, "u4"))
     assert hr == 0
 
 
@@ -92,7 +92,7 @@ def test_hit_array():
     hr = _test_hit(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert hr == 1
 
-    hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, "u4"))
     assert hr == 0
 
 
@@ -122,19 +122,18 @@ def test_hit_partial_rel():
 def test_hit_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(hit, name='hk', k=5)
+    rla.add_metric(hit, name="hk", k=5)
     rla.add_metric(hit)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: hit(*a, **k), name='ind_hk', k=5)
-    rla.add_metric(lambda *a: hit(*a), name='ind_h')
+    rla.add_metric(lambda *a, **k: hit(*a, **k), name="ind_hk", k=5)
+    rla.add_metric(lambda *a: hit(*a), name="ind_h")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('recall mismatches:\n%s',
-              res[res.hit != res.ind_h])
+    _log.info("recall mismatches:\n%s", res[res.hit != res.ind_h])
 
     assert res.hit.values == approx(res.ind_h.values)
     assert res.hk.values == approx(res.ind_hk.values)
diff --git a/tests/test_topn_mrr.py b/tests/test_topn_mrr.py
index 5fc04d268..bb2b73843 100644
--- a/tests/test_topn_mrr.py
+++ b/tests/test_topn_mrr.py
@@ -11,8 +11,8 @@
 
 
 def _test_rr(items, rel, **kw):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return recip_rank(recs, truth, **kw)
 
 
@@ -53,36 +53,36 @@ def test_mrr_series_idx():
 
 def test_mrr_array_late():
     "deep -> 0.1"
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10])
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10])
     assert rr == approx(0.1)
 
 
 def test_mrr_k_trunc():
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10], k=5)
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10], k=5)
     assert rr == approx(0.0)
 
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10, 5], k=5)
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10, 5], k=5)
     assert rr == approx(0.2)
 
 
 def test_mrr_k_short():
-    rr = _test_rr(np.arange(1, 5, 1, 'u4'), [2], k=10)
+    rr = _test_rr(np.arange(1, 5, 1, "u4"), [2], k=10)
     assert rr == approx(0.5)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_mrr_bulk(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = RecListAnalysis()
     rla.add_metric(recip_rank)
-    rla.add_metric(recip_rank, name='rr_k', k=10)
+    rla.add_metric(recip_rank, name="rr_k", k=10)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: recip_rank(*a), name='ind_rr')
-    rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name='ind_rr_k', k=10)
+    rla.add_metric(lambda *a: recip_rank(*a), name="ind_rr")
+    rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name="ind_rr_k", k=10)
     res = rla.compute(recs, test)
 
     assert all(res.recip_rank == res.ind_rr)
diff --git a/tests/test_topn_ndcg.py b/tests/test_topn_ndcg.py
index 2fdf9e573..830a4b8b6 100644
--- a/tests/test_topn_ndcg.py
+++ b/tests/test_topn_ndcg.py
@@ -55,71 +55,67 @@ def test_dcg_nan():
 
 def test_dcg_series():
     "The DCG function should work on a series"
-    assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == \
-        approx((np.e + np.pi / np.log2(4)))
+    assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))
 
 
 def test_dcg_mult2():
     "multiple elements should score correctly"
     assert _dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
-    assert _dcg(np.array([np.e, 0, 0, np.pi])) == \
-        approx((np.e + np.pi / np.log2(4)))
+    assert _dcg(np.array([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))
 
 
 def test_ndcg_empty():
-    recs = pd.DataFrame({'item': []})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": []})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(0.0)
 
 
 def test_ndcg_no_match():
-    recs = pd.DataFrame({'item': [4]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [4]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(0.0)
 
 
 def test_ndcg_perfect():
-    recs = pd.DataFrame({'item': [2, 3, 1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3, 1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(1.0)
 
 
 def test_ndcg_perfect_k_short():
-    recs = pd.DataFrame({'item': [2, 3, 1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3, 1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth, k=2) == approx(1.0)
     assert ndcg(recs[:2], truth, k=2) == approx(1.0)
 
 
 def test_ndcg_wrong():
-    recs = pd.DataFrame({'item': [1, 2]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [1, 2]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(_dcg([3.0, 5.0] / _dcg([5.0, 4.0, 3.0])))
 
 
 def test_ndcg_perfect_k():
-    recs = pd.DataFrame({'item': [2, 3]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth, k=2) == approx(1.0)
 
 
 def test_ndcg_bulk_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 30, 2),
-        (1, 1, 72, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     ndcg = _bulk_ndcg(recs, truth)
     assert len(ndcg) == 1
@@ -128,16 +124,14 @@ def test_ndcg_bulk_at_top():
 
 
 def test_ndcg_bulk_not_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 72, 2),
-        (1, 1, 30, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     ndcg = _bulk_ndcg(recs, truth)
     assert len(ndcg) == 1
@@ -145,23 +139,23 @@ def test_ndcg_bulk_not_at_top():
     assert ndcg.iloc[0] == approx(0.8155, abs=0.001)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_ndcg_bulk_match(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = RecListAnalysis()
     rla.add_metric(ndcg)
-    rla.add_metric(ndcg, name='ndcg_k', k=5)
+    rla.add_metric(ndcg, name="ndcg_k", k=5)
     rla.add_metric(dcg)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: ndcg(*a), name='ind_ndcg')
-    rla.add_metric(lambda *a, **k: ndcg(*a, **k), name='ind_ndcg_k', k=5)
+    rla.add_metric(lambda *a: ndcg(*a), name="ind_ndcg")
+    rla.add_metric(lambda *a, **k: ndcg(*a, **k), name="ind_ndcg_k", k=5)
     res = rla.compute(recs, test)
 
-    res['ind_ideal'] = res['dcg'] / res['ind_ndcg']
+    res["ind_ideal"] = res["dcg"] / res["ind_ndcg"]
     print(res)
 
     assert res.ndcg.values == approx(res.ind_ndcg.values)
diff --git a/tests/test_topn_precision.py b/tests/test_topn_precision.py
index 024c6e536..62df2bc1a 100644
--- a/tests/test_topn_precision.py
+++ b/tests/test_topn_precision.py
@@ -9,8 +9,8 @@
 
 
 def _test_prec(items, rel, **k):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return precision(recs, truth, **k)
 
 
@@ -76,7 +76,7 @@ def test_precision_series_array():
     prec = _test_prec(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5]))
     assert prec == approx(0.5)
 
-    prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4'))
+    prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, "u4"))
     assert prec == approx(0.25)
 
 
@@ -87,7 +87,7 @@ def test_precision_array():
     prec = _test_prec(np.array([1, 2, 3, 4]), np.array([1, 3, 5]))
     assert prec == approx(0.5)
 
-    prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4'))
+    prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, "u4"))
     assert prec == approx(0.25)
 
 
@@ -118,14 +118,14 @@ def test_prec_short_items():
 def test_recall_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(precision, name='pk', k=5)
+    rla.add_metric(precision, name="pk", k=5)
     rla.add_metric(precision)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: precision(*a, **k), name='ind_pk', k=5)
-    rla.add_metric(lambda *a: precision(*a), name='ind_p')
+    rla.add_metric(lambda *a, **k: precision(*a, **k), name="ind_pk", k=5)
+    rla.add_metric(lambda *a: precision(*a), name="ind_p")
     res = rla.compute(recs, test)
 
     assert res.precision.values == approx(res.ind_p.values)
diff --git a/tests/test_topn_rbp.py b/tests/test_topn_rbp.py
index c3496b276..babe76dc3 100644
--- a/tests/test_topn_rbp.py
+++ b/tests/test_topn_rbp.py
@@ -15,82 +15,86 @@
 
 
 def test_rbp_empty():
-    recs = pd.DataFrame({'item': []})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": []})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.0)
 
 
 def test_rbp_no_match():
-    recs = pd.DataFrame({'item': [4]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [4]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.0)
 
 
 def test_rbp_one_match():
-    recs = pd.DataFrame({'item': [1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.5)
 
 
 @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
 def test_rbp_perfect(items, p):
     n = len(items)
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, patience=p) == approx(np.sum(p ** np.arange(n)) * (1 - p))
 
 
 @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
 def test_rbp_perfect_norm(items, p):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, patience=p, normalize=True) == approx(1.0)
 
 
-@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
-       st.integers(1, 100), st.floats(0.05, 0.95))
+@given(
+    st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
+    st.integers(1, 100),
+    st.floats(0.05, 0.95),
+)
 def test_rbp_perfect_k(items, k, p):
     n = len(items)
     eff_n = min(n, k)
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, k=k, patience=p) == approx(np.sum(p ** np.arange(eff_n)) * (1 - p))
 
 
-@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
-       st.integers(1, 100), st.floats(0.05, 0.95))
+@given(
+    st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
+    st.integers(1, 100),
+    st.floats(0.05, 0.95),
+)
 def test_rbp_perfect_k_norm(items, k, p):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, k=k, patience=p, normalize=True) == approx(1.0)
 
 
 def test_rbp_missing():
-    recs = pd.DataFrame({'item': [1, 2]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": [1, 2]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item").sort_index()
     # (1 + 0.5) * 0.5
     assert rbp(recs, truth) == approx(0.75)
 
 
 def test_rbp_bulk_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 30, 2),
-        (1, 1, 72, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     rbp = _bulk_rbp(recs, truth)
     assert len(rbp) == 1
@@ -99,16 +103,14 @@ def test_rbp_bulk_at_top():
 
 
 def test_rbp_bulk_not_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 72, 2),
-        (1, 1, 30, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     rbp = _bulk_rbp(recs, truth)
     assert len(rbp) == 1
@@ -116,27 +118,29 @@ def test_rbp_bulk_not_at_top():
     assert rbp.iloc[0] == approx((1 + 0.25) * 0.5)
 
 
-@mark.parametrize('normalize', [False, True])
+@mark.parametrize("normalize", [False, True])
 def test_rbp_bulk_match(demo_recs, normalize):
     "bulk and normal match"
     train, test, recs = demo_recs
 
     rla = RecListAnalysis()
     rla.add_metric(rbp, normalize=normalize)
-    rla.add_metric(rbp, name='rbp_k', k=5, normalize=normalize)
+    rla.add_metric(rbp, name="rbp_k", k=5, normalize=normalize)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name='ind_rbp')
-    rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name='ind_rbp_k', k=5)
+    rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name="ind_rbp")
+    rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name="ind_rbp_k", k=5)
     res = rla.compute(recs, test)
 
-    res['diff'] = np.abs(res.rbp - res.ind_rbp)
-    rl = res.nlargest(5, 'diff')
-    _log.info('res:\n%s', rl)
+    res["diff"] = np.abs(res.rbp - res.ind_rbp)
+    rl = res.nlargest(5, "diff")
+    _log.info("res:\n%s", rl)
     user = rl.index[0]
-    _log.info('user: %s\n%s', user, rl.iloc[0])
-    _log.info('test:\n%s', test[test['user'] == user])
-    urecs = recs[recs['user'] == user].join(test.set_index(['user', 'item'])['rating'], on=['user', 'item'], how='left')
-    _log.info('recs:\n%s', urecs[urecs['rating'].notnull()])
+    _log.info("user: %s\n%s", user, rl.iloc[0])
+    _log.info("test:\n%s", test[test["user"] == user])
+    urecs = recs[recs["user"] == user].join(
+        test.set_index(["user", "item"])["rating"], on=["user", "item"], how="left"
+    )
+    _log.info("recs:\n%s", urecs[urecs["rating"].notnull()])
 
     assert res.rbp.values == approx(res.ind_rbp.values)
     assert res.rbp_k.values == approx(res.ind_rbp_k.values)
diff --git a/tests/test_topn_recall.py b/tests/test_topn_recall.py
index c11a6ad7f..612acdf08 100644
--- a/tests/test_topn_recall.py
+++ b/tests/test_topn_recall.py
@@ -12,8 +12,8 @@
 
 
 def _test_recall(items, rel, **kwargs):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return recall(recs, truth, **kwargs)
 
 
@@ -84,7 +84,7 @@ def test_recall_series_array():
     prec = _test_recall(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert prec == approx(0.5)
 
-    prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4'))
+    prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, "u4"))
     assert prec == approx(0.2)
 
 
@@ -95,7 +95,7 @@ def test_recall_array():
     prec = _test_recall(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert prec == approx(0.5)
 
-    prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4'))
+    prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, "u4"))
     assert prec == approx(0.2)
 
 
@@ -126,19 +126,18 @@ def test_recall_partial_rel():
 def test_recall_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(recall, name='rk', k=5)
+    rla.add_metric(recall, name="rk", k=5)
     rla.add_metric(recall)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: recall(*a, **k), name='ind_rk', k=5)
-    rla.add_metric(lambda *a: recall(*a), name='ind_r')
+    rla.add_metric(lambda *a, **k: recall(*a, **k), name="ind_rk", k=5)
+    rla.add_metric(lambda *a: recall(*a), name="ind_r")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('recall mismatches:\n%s',
-              res[res.recall != res.ind_r])
+    _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r])
 
     assert res.recall.values == approx(res.ind_r.values)
     assert res.rk.values == approx(res.ind_rk.values)
diff --git a/tests/test_topn_recs.py b/tests/test_topn_recs.py
index 07d1840fa..e570f8c1f 100644
--- a/tests/test_topn_recs.py
+++ b/tests/test_topn_recs.py
@@ -7,9 +7,9 @@
 import lenskit.util.test as lktu
 from pytest import approx
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_topn_recommend():
@@ -37,14 +37,14 @@ def test_topn_config():
     rec = basic.TopN(pred)
 
     rs = str(rec)
-    assert rs.startswith('TopN/')
+    assert rs.startswith("TopN/")
 
 
 def test_topn_big():
     ratings = lktu.ml_test.ratings
     users = ratings.user.unique()
     items = ratings.item.unique()
-    user_items = ratings.set_index('user').item
+    user_items = ratings.set_index("user").item
 
     algo = basic.TopN(bias.Bias())
     a2 = algo.fit(ratings)
@@ -55,7 +55,7 @@ def test_topn_big():
         recs = algo.recommend(u, 100)
         assert len(recs) == 100
         rated = user_items.loc[u]
-        assert all(~recs['item'].isin(rated))
+        assert all(~recs["item"].isin(rated))
         unrated = np.setdiff1d(items, rated)
         scores = algo.predictor.predict_for_user(u, unrated)
         top = scores.nlargest(100)
diff --git a/tests/test_topn_utils.py b/tests/test_topn_utils.py
index c97f1f496..25257c5df 100644
--- a/tests/test_topn_utils.py
+++ b/tests/test_topn_utils.py
@@ -6,7 +6,7 @@
 
 def test_cs_rated_items_series():
     "rated_items should de-index series"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
     series = pd.Series(np.random.randn(3), index=items)
 
     i2 = CandidateSelector.rated_items(series)
@@ -16,7 +16,7 @@ def test_cs_rated_items_series():
 
 def test_cs_rated_items():
     "rated_items should return list as array"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
 
     i2 = CandidateSelector.rated_items(items)
     assert isinstance(i2, np.ndarray)
@@ -25,7 +25,7 @@ def test_cs_rated_items():
 
 def test_cs_rated_items_array():
     "rated_items should return array as itself"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
     items = np.array(items)
 
     i2 = CandidateSelector.rated_items(items)
diff --git a/tests/test_util.py b/tests/test_util.py
index 63fbc783a..6f134b0a2 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -31,14 +31,14 @@ def test_stopwatch_str():
     w = lku.Stopwatch()
     time.sleep(0.5)
     s = str(w)
-    assert s.endswith('ms')
+    assert s.endswith("ms")
 
 
 def test_stopwatch_long_str():
     w = lku.Stopwatch()
     time.sleep(1.2)
     s = str(w)
-    assert s.endswith('s')
+    assert s.endswith("s")
 
 
 def test_stopwatch_minutes():
@@ -46,7 +46,7 @@ def test_stopwatch_minutes():
     w.stop()
     w.start_time = w.stop_time - 62
     s = str(w)
-    p = re.compile(r'1m2.\d\ds')
+    p = re.compile(r"1m2.\d\ds")
     assert p.match(s)
 
 
@@ -55,7 +55,7 @@ def test_stopwatch_hours():
     w.stop()
     w.start_time = w.stop_time - 3663
     s = str(w)
-    p = re.compile(r'1h1m3.\d\ds')
+    p = re.compile(r"1h1m3.\d\ds")
     assert p.match(s)
 
 
@@ -64,6 +64,7 @@ def test_last_memo():
 
     def func(foo):
         history.append(foo)
+
     cache = lku.LastMemo(func)
 
     cache("foo")
diff --git a/tests/test_util_algos.py b/tests/test_util_algos.py
index 644b0240a..8437ebd35 100644
--- a/tests/test_util_algos.py
+++ b/tests/test_util_algos.py
@@ -5,9 +5,9 @@
 
 import lenskit.util.test as lktu
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_memorized():
@@ -26,9 +26,9 @@ def test_memorized():
 def test_memorized_batch():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 10, 12], "item": [1, 2, 1]}))
     assert isinstance(preds, pd.Series)
-    assert preds.name == 'prediction'
+    assert preds.name == "prediction"
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds == [4.0, 5.0, 3.0])
 
@@ -36,7 +36,7 @@ def test_memorized_batch():
 def test_memorized_batch_ord():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 10], 'item': [1, 1, 2]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 10], "item": [1, 1, 2]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds == [4.0, 3.0, 5.0])
 
@@ -44,7 +44,7 @@ def test_memorized_batch_ord():
 def test_memorized_batch_missing():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds.iloc[:2] == [4.0, 3.0])
     assert np.isnan(preds.iloc[2])
@@ -53,8 +53,9 @@ def test_memorized_batch_missing():
 def test_memorized_batch_keep_index():
     algo = basic.Memorized(simple_df)
 
-    query = pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]},
-                         index=np.random.choice(np.arange(10), 3, False))
+    query = pd.DataFrame(
+        {"user": [10, 10, 12], "item": [1, 2, 1]}, index=np.random.choice(np.arange(10), 3, False)
+    )
     preds = algo.predict(query)
     assert all(preds.index == query.index)
     assert all(preds == [4.0, 5.0, 3.0])
@@ -64,7 +65,7 @@ def test_random():
     # test case: no seed
     algo = basic.Random()
     model = algo.fit(lktu.ml_test.ratings)
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     assert model is not None
@@ -74,17 +75,17 @@ def test_random():
     assert len(recs1) == 100
     assert len(recs2) == 100
     # with very high probabilities
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_random_derive_seed():
-    algo = basic.Random(rng_spec='user')
+    algo = basic.Random(rng_spec="user")
     model = algo.fit(lktu.ml_test.ratings)
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     assert model is not None
@@ -94,17 +95,17 @@ def test_random_derive_seed():
     assert len(recs1) == 100
     assert len(recs2) == 100
     # with very high probabilities
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_random_rec_from_candidates():
     algo = basic.Random()
-    items = lktu.ml_test.ratings['item'].unique()
-    users = lktu.ml_test.ratings['user'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
+    users = lktu.ml_test.ratings["user"].unique()
     user1, user2 = np.random.choice(users, size=2, replace=False)
     algo.fit(lktu.ml_test.ratings)
 
@@ -134,7 +135,7 @@ def test_knownrating_batch_missing():
     algo = basic.KnownRating()
     algo.fit(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds.iloc[:2] == [4.0, 3.0])
     assert np.isnan(preds.iloc[2])
diff --git a/tests/test_util_random.py b/tests/test_util_random.py
index 62b3abe15..d582a2dcb 100644
--- a/tests/test_util_random.py
+++ b/tests/test_util_random.py
@@ -62,7 +62,7 @@ def test_initialize():
 
 
 def test_initialize_key():
-    random.init_rng(42, 'wombat')
+    random.init_rng(42, "wombat")
     assert root_seed().entropy == 42
     # assert root_seed().spawn_key == (zlib.crc32(b'wombat'),)
 
@@ -83,6 +83,6 @@ def test_derive_seed_intkey():
 
 def test_derive_seed_str():
     random.init_rng(42, propagate=False)
-    s2 = random.derive_seed(b'wombat')
+    s2 = random.derive_seed(b"wombat")
     assert s2.entropy == 42
     # assert s2.spawn_key == (zlib.crc32(b'wombat'),)