From 547b1355d3747b267db3e21aefd143382f49f4ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 10:27:51 +0100 Subject: [PATCH 1/4] Fix perf regression in PR #3530 The get_entities_changed function was changed to return all changed entities since the given stream position, rather than only those changed from a given list of entities. This resulted in the function incorrectly returning large numbers of entities that, for example, caused large increases in database usage. --- synapse/util/caches/stream_change_cache.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index 258655349b62..c1e76b1a0e15 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -74,12 +74,17 @@ def get_entities_changed(self, entities, stream_pos): assert type(stream_pos) is int if stream_pos >= self._earliest_known_stream_pos: - result = { + changed_entities = { self._cache[k] for k in self._cache.islice( start=self._cache.bisect_right(stream_pos), ) } + result = { + e for e in entities + if e in changed_entities + } + self.metrics.inc_hits() else: result = set(entities) From 9952d18e4dc06f89914dc3f2bb07a5ca7e4e0b4f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 10:31:51 +0100 Subject: [PATCH 2/4] Newsfile --- changelog.d/3544.misc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 changelog.d/3544.misc diff --git a/changelog.d/3544.misc b/changelog.d/3544.misc new file mode 100644 index 000000000000..e69de29bb2d1 From 850238b4ef1573a4162048d5ae285bb3fdccf5bb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 10:59:02 +0100 Subject: [PATCH 3/4] Add unit test --- tests/util/test_stream_change_cache.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/util/test_stream_change_cache.py b/tests/util/test_stream_change_cache.py index fc45baaaa04c..65b0f2e6fbc8 100644 --- a/tests/util/test_stream_change_cache.py +++ b/tests/util/test_stream_change_cache.py @@ -178,6 +178,22 @@ def test_get_entities_changed(self): ), ) + # Query a subset of the entries mid-way through the stream. We should + # only get back the subset. + self.assertEqual( + cache.get_entities_changed( + [ + "bar@baz.net", + ], + stream_pos=2, + ), + set( + [ + "bar@baz.net", + ] + ), + ) + def test_max_pos(self): """ StreamChangeCache.get_max_pos_of_last_change will return the most From b2aa05a8d6b1b2fb9e1efcc6fb03f1b49bc1be1d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Jul 2018 11:07:04 +0100 Subject: [PATCH 4/4] Use efficient .intersection --- synapse/util/caches/stream_change_cache.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/util/caches/stream_change_cache.py b/synapse/util/caches/stream_change_cache.py index c1e76b1a0e15..f2bde74dc58a 100644 --- a/synapse/util/caches/stream_change_cache.py +++ b/synapse/util/caches/stream_change_cache.py @@ -80,10 +80,7 @@ def get_entities_changed(self, entities, stream_pos): ) } - result = { - e for e in entities - if e in changed_entities - } + result = changed_entities.intersection(entities) self.metrics.inc_hits() else: