From 380b40f2fba95179214377166bfa3adb728ba051 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 7 Dec 2024 14:48:00 -0500 Subject: [PATCH 1/2] fix key errors when adding ILCs --- lenskit/lenskit/data/collection.py | 4 ++-- lenskit/tests/data/test_collection.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lenskit/lenskit/data/collection.py b/lenskit/lenskit/data/collection.py index 6da11a94e..332d1d222 100644 --- a/lenskit/lenskit/data/collection.py +++ b/lenskit/lenskit/data/collection.py @@ -220,8 +220,8 @@ def add_from(self, other: ItemListCollection, **fields: ID): """ for key, list in other: if fields: - fields = key._asdict() | fields - key = self._key_class(**fields) + cf = key._asdict() | fields + key = self._key_class(**cf) self._add(key, list) def _add(self, key: K, list: ItemList): diff --git a/lenskit/tests/data/test_collection.py b/lenskit/tests/data/test_collection.py index cbab50f9b..7d9893202 100644 --- a/lenskit/tests/data/test_collection.py +++ b/lenskit/tests/data/test_collection.py @@ -117,13 +117,16 @@ def test_lookup_projected(): def test_add_from(): ilc = ItemListCollection(["model", "user_id"]) - ilc1 = ItemListCollection.from_dict({72: ItemList(["a", "b"])}, key="user_id") + ilc1 = ItemListCollection.from_dict({72: ItemList(["a", "b"]), 48: ItemList()}, key="user_id") ilc.add_from(ilc1, model="foo") - assert len(ilc) == 1 + assert len(ilc) == 2 il = ilc.lookup(("foo", 72)) assert il is not None assert il.ids().tolist() == ["a", "b"] + il = ilc.lookup(("foo", 48)) + assert il is not None + assert len(il) == 0 def test_from_df(rng, ml_ratings: pd.DataFrame): From 94d3a52da659d90253a57bff832d7ef13b2b5705 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sat, 7 Dec 2024 14:59:57 -0500 Subject: [PATCH 2/2] support grouped usmmaries in eval report --- lenskit/lenskit/metrics/bulk.py | 17 +++++++++++++---- lenskit/tests/eval/test_bulk_metrics.py | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/lenskit/lenskit/metrics/bulk.py b/lenskit/lenskit/metrics/bulk.py index 2d2dba534..a981a8d14 100644 --- a/lenskit/lenskit/metrics/bulk.py +++ b/lenskit/lenskit/metrics/bulk.py @@ -76,7 +76,7 @@ def global_metrics(self) -> pd.Series: """ return self._global_metrics - def list_metrics(self, *, fill_missing=True) -> pd.DataFrame: + def list_metrics(self, fill_missing=True) -> pd.DataFrame: """ Get the per-list scores of the results. This is a data frame with one row per list (with the list key on the inded), and one metric per @@ -91,7 +91,7 @@ def list_metrics(self, *, fill_missing=True) -> pd.DataFrame: """ return self._list_metrics.fillna(self._defaults) - def list_summary(self) -> pd.DataFrame: + def list_summary(self, *keys: str) -> pd.DataFrame: """ Sumamry statistics for the per-list metrics. Each metric is on its own row, with columns reporting the following: @@ -105,10 +105,19 @@ def list_summary(self) -> pd.DataFrame: Additional columns are added based on other options. Missing metric values are filled with their defaults before computing statistics. + + Args: + keys: + Identifiers for different conditions that should be reported + separately (grouping keys for the final result). """ scores = self.list_metrics(fill_missing=True) - df = scores.agg(["mean", "median", "std"]).T - df.index.name = "metric" + if keys: + df = scores.groupby(list(keys)).agg(["mean", "median", "std"]).stack(level=0) + assert isinstance(df, pd.DataFrame) + else: + df = scores.agg(["mean", "median", "std"]).T + df.index.name = "metric" return df def merge_from(self, other: RunAnalysisResult): diff --git a/lenskit/tests/eval/test_bulk_metrics.py b/lenskit/tests/eval/test_bulk_metrics.py index 204d6dcd3..a94b6c893 100644 --- a/lenskit/tests/eval/test_bulk_metrics.py +++ b/lenskit/tests/eval/test_bulk_metrics.py @@ -43,3 +43,26 @@ def test_recs(demo_recs): print(stats) for m in bms.metrics: assert stats.loc[m.label, "mean"] == approx(scores[m.label].mean()) + + +def test_recs_multi(demo_recs): + split, recs = demo_recs + + il2 = ItemListCollection(["rep", "user_id"]) + il2.add_from(recs, rep=1) + il2.add_from(recs, rep=2) + + bms = RunAnalysis() + bms.add_metric(ListLength()) + bms.add_metric(Precision()) + bms.add_metric(NDCG()) + bms.add_metric(RBP) + bms.add_metric(RecipRank) + + metrics = bms.compute(il2, split.test) + scores = metrics.list_metrics() + stats = metrics.list_summary("rep") + print(stats) + for m in bms.metrics: + assert stats.loc[(1, m.label), "mean"] == approx(scores.loc[1, m.label].mean()) + assert stats.loc[(2, m.label), "mean"] == approx(scores.loc[2, m.label].mean())