From fc3571ec0748c31250d099c1a7aac2e01d8e7cfe Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Tue, 2 Aug 2022 14:54:35 +0100 Subject: [PATCH 01/11] Add local variants of get event from cache methods --- .../storage/databases/main/events_worker.py | 30 +++++++++++++++++-- synapse/util/caches/lrucache.py | 8 +++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 29c99c635735..d5d855941194 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -754,7 +754,31 @@ def _invalidate_local_get_event_cache(self, event_id: str) -> None: async def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True ) -> Dict[str, EventCacheEntry]: - """Fetch events from the caches. + """Fetch events from the caches, both in memory and any external. + + May return rejected events. + + Args: + events: list of event_ids to fetch + update_metrics: Whether to update the cache hit ratio metrics + """ + event_map = self._get_events_from_local_cache(events, update_metrics=update_metrics) + + missing_event_ids = {e for e in events if e not in event_map} + + for event_id in missing_event_ids: + ret = await self._get_event_cache.get( + (event_id,), None, update_metrics=update_metrics + ) + if ret: + event_map[event_id] = ret + + return event_map + + def _get_events_from_local_cache( + self, events: Iterable[str], update_metrics: bool = True + ) -> Dict[str, EventCacheEntry]: + """Fetch events from the local, in memory, caches. May return rejected events. @@ -766,7 +790,7 @@ async def _get_events_from_cache( for event_id in events: # First check if it's in the event cache - ret = await self._get_event_cache.get( + ret = self._get_event_cache.get_local( (event_id,), None, update_metrics=update_metrics ) if ret: @@ -788,7 +812,7 @@ async def _get_events_from_cache( # We add the entry back into the cache as we want to keep # recently queried events in the cache. - await self._get_event_cache.set((event_id,), cache_entry) + self._get_event_cache.set_local((event_id,), cache_entry) return event_map diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index b3bdedb04cad..1b023a4f3ce8 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -834,9 +834,17 @@ async def get( ) -> Optional[VT]: return self._lru_cache.get(key, update_metrics=update_metrics) + def get_local( + self, key: KT, default: Optional[T] = None, update_metrics: bool = True + ) -> Optional[VT]: + return self._lru_cache.get(key, update_metrics=update_metrics) + async def set(self, key: KT, value: VT) -> None: self._lru_cache.set(key, value) + def set_local(self, key: KT, value: VT) -> None: + self._lru_cache.set(key, value) + async def invalidate(self, key: KT) -> None: # This method should invalidate any external cache and then invalidate the LruCache. return self._lru_cache.invalidate(key) From af8630eae423e3ac2bb160c80e4c0966a015c899 Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Tue, 2 Aug 2022 14:57:09 +0100 Subject: [PATCH 02/11] Check external event cache within deferred Still maintains local in memory lookup optimisation, but does any external lookup as part of the deferred that prevents duplicate lookups for the same event at once. This makes the assumption that fetching from an external cache is a non-zero load operation. --- .../storage/databases/main/events_worker.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index d5d855941194..f7aa75f985c0 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -600,7 +600,10 @@ async def _get_events_from_cache_or_db( Returns: map from event id to result """ - event_entry_map = await self._get_events_from_cache( + # Shortcut: check if we have any events in the *in memory* cache - this function + # may be called repeatedly for the same event so at this point we cannot reach + # out to any external cache for performance reasons. + event_entry_map = self._get_events_from_local_cache( event_ids, ) @@ -632,7 +635,7 @@ async def _get_events_from_cache_or_db( if missing_events_ids: - async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]: + async def get_missing_events_from_cache_or_db() -> Dict[str, EventCacheEntry]: """Fetches the events in `missing_event_ids` from the database. Also creates entries in `self._current_event_fetches` to allow @@ -657,10 +660,18 @@ async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]: # the events have been redacted, and if so pulling the redaction event # out of the database to check it. # + missing_events = {} try: - missing_events = await self._get_events_from_db( + # First fetch from the cache - including any external caches + cache_missing_events = await self._get_events_from_cache( missing_events_ids, ) + missing_events.update(cache_missing_events) + # Now actually fetch any remaining events from the DB + db_missing_events = await self._get_events_from_db( + missing_events_ids - set(cache_missing_events.keys()), + ) + missing_events.update(db_missing_events) except Exception as e: with PreserveLoggingContext(): fetching_deferred.errback(e) @@ -679,7 +690,7 @@ async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]: # cancellations, since multiple `_get_events_from_cache_or_db` calls can # reuse the same fetch. missing_events: Dict[str, EventCacheEntry] = await delay_cancellation( - get_missing_events_from_db() + get_missing_events_from_cache_or_db() ) event_entry_map.update(missing_events) From 030888bbfb7a8b86b06910a79be719ae2a0a9fed Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Tue, 2 Aug 2022 14:57:49 +0100 Subject: [PATCH 03/11] Only check local event cache for roommember lookup optimisation --- synapse/storage/databases/main/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index e2cccc688c9f..93ff4816c804 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -896,7 +896,7 @@ async def _get_joined_users_from_context( # We don't update the event cache hit ratio as it completely throws off # the hit ratio counts. After all, we don't populate the cache if we # miss it here - event_map = await self._get_events_from_cache( + event_map = self._get_events_from_local_cache( member_event_ids, update_metrics=False ) From 67544d1f64061d6590f7303134deb46f2f125c83 Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Tue, 2 Aug 2022 15:07:22 +0100 Subject: [PATCH 04/11] Linting --- synapse/storage/databases/main/events_worker.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index f7aa75f985c0..e084a8ecee9c 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -635,7 +635,9 @@ async def _get_events_from_cache_or_db( if missing_events_ids: - async def get_missing_events_from_cache_or_db() -> Dict[str, EventCacheEntry]: + async def get_missing_events_from_cache_or_db() -> Dict[ + str, EventCacheEntry + ]: """Fetches the events in `missing_event_ids` from the database. Also creates entries in `self._current_event_fetches` to allow @@ -773,7 +775,9 @@ async def _get_events_from_cache( events: list of event_ids to fetch update_metrics: Whether to update the cache hit ratio metrics """ - event_map = self._get_events_from_local_cache(events, update_metrics=update_metrics) + event_map = self._get_events_from_local_cache( + events, update_metrics=update_metrics + ) missing_event_ids = {e for e in events if e not in event_map} From c228290db83d367086ca9319c252c809e3925a6f Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Tue, 2 Aug 2022 15:50:05 +0100 Subject: [PATCH 05/11] Add changelog file --- changelog.d/13435.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13435.misc diff --git a/changelog.d/13435.misc b/changelog.d/13435.misc new file mode 100644 index 000000000000..0edde490bf8f --- /dev/null +++ b/changelog.d/13435.misc @@ -0,0 +1 @@ +Prevent unnecessary lookups to any external get event cache. Contributed by Nick @ Beeper (@fizzadar). From 7382e0dd1f66864801a2e6cdf590f4c7adbb119f Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 3 Aug 2022 19:31:38 +0100 Subject: [PATCH 06/11] Remove unnecessary dict copying Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/databases/main/events_worker.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index e084a8ecee9c..1caa8e1db27b 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -665,13 +665,12 @@ async def get_missing_events_from_cache_or_db() -> Dict[ missing_events = {} try: # First fetch from the cache - including any external caches - cache_missing_events = await self._get_events_from_cache( + missing_events = await self._get_events_from_cache( missing_events_ids, ) - missing_events.update(cache_missing_events) # Now actually fetch any remaining events from the DB db_missing_events = await self._get_events_from_db( - missing_events_ids - set(cache_missing_events.keys()), + missing_events_ids - missing_events.keys(), ) missing_events.update(db_missing_events) except Exception as e: From 012af6c29b9cd0e53dd2c73e3b14180bb9cc3630 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 3 Aug 2022 19:34:39 +0100 Subject: [PATCH 07/11] Changelog cleanup Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- changelog.d/13435.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/13435.misc b/changelog.d/13435.misc index 0edde490bf8f..c01b9136c809 100644 --- a/changelog.d/13435.misc +++ b/changelog.d/13435.misc @@ -1 +1 @@ -Prevent unnecessary lookups to any external get event cache. Contributed by Nick @ Beeper (@fizzadar). +Prevent unnecessary lookups to any external `get_event` cache. Contributed by Nick @ Beeper (@fizzadar). From d15b7c36997d851eb8e14073429b2343962d5d4a Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Wed, 3 Aug 2022 19:36:30 +0100 Subject: [PATCH 08/11] Use explicit get events from external cache call This avoid duplicating lookups, and thus metrics, on the in memory event cache. --- .../storage/databases/main/events_worker.py | 29 +++++++++++++++---- synapse/util/caches/lrucache.py | 6 ++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 1caa8e1db27b..3f82450ba380 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -602,7 +602,8 @@ async def _get_events_from_cache_or_db( """ # Shortcut: check if we have any events in the *in memory* cache - this function # may be called repeatedly for the same event so at this point we cannot reach - # out to any external cache for performance reasons. + # out to any external cache for performance reasons, this is done later on in + # the `get_missing_events_from_cache_or_db` function below. event_entry_map = self._get_events_from_local_cache( event_ids, ) @@ -664,8 +665,8 @@ async def get_missing_events_from_cache_or_db() -> Dict[ # missing_events = {} try: - # First fetch from the cache - including any external caches - missing_events = await self._get_events_from_cache( + # Try to fetch from any external cache, in-memory cache checked above + missing_events = await self._get_events_from_external_cache( missing_events_ids, ) # Now actually fetch any remaining events from the DB @@ -779,9 +780,27 @@ async def _get_events_from_cache( ) missing_event_ids = {e for e in events if e not in event_map} + event_map.update(self._get_events_from_external_cache( + events=missing_event_ids, update_metrics=update_metrics, + )) - for event_id in missing_event_ids: - ret = await self._get_event_cache.get( + return event_map + + async def _get_events_from_external_cache( + self, events: Iterable[str], update_metrics: bool = True + ) -> Dict[str, EventCacheEntry]: + """Fetch events from any configured external cache. + + May return rejected events. + + Args: + events: list of event_ids to fetch + update_metrics: Whether to update the cache hit ratio metrics + """ + event_map = {} + + for event_id in events: + ret = await self._get_event_cache.get_external( (event_id,), None, update_metrics=update_metrics ) if ret: diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 1b023a4f3ce8..80f861774e05 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -834,6 +834,12 @@ async def get( ) -> Optional[VT]: return self._lru_cache.get(key, update_metrics=update_metrics) + async def get_external( + self, key: KT, default: Optional[T] = None, update_metrics: bool = True, + ) -> Optional[VT]: + # This method should fetch from any configured external cache, in this case noop. + return None + def get_local( self, key: KT, default: Optional[T] = None, update_metrics: bool = True ) -> Optional[VT]: From f1887cbec3589115f1b42870748d5101a20a4c51 Mon Sep 17 00:00:00 2001 From: Nick Barrett Date: Wed, 3 Aug 2022 19:43:49 +0100 Subject: [PATCH 09/11] Linting --- synapse/storage/databases/main/events_worker.py | 9 ++++++--- synapse/util/caches/lrucache.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 3f82450ba380..283a1310c89a 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -780,9 +780,12 @@ async def _get_events_from_cache( ) missing_event_ids = {e for e in events if e not in event_map} - event_map.update(self._get_events_from_external_cache( - events=missing_event_ids, update_metrics=update_metrics, - )) + event_map.update( + await self._get_events_from_external_cache( + events=missing_event_ids, + update_metrics=update_metrics, + ) + ) return event_map diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 80f861774e05..aa93109d1380 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -835,7 +835,10 @@ async def get( return self._lru_cache.get(key, update_metrics=update_metrics) async def get_external( - self, key: KT, default: Optional[T] = None, update_metrics: bool = True, + self, + key: KT, + default: Optional[T] = None, + update_metrics: bool = True, ) -> Optional[VT]: # This method should fetch from any configured external cache, in this case noop. return None From e8ce42ed17ecaf037ecf2e237a93612f4e12050c Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 4 Aug 2022 13:17:16 +0100 Subject: [PATCH 10/11] Fixup comments Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/databases/main/events_worker.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 283a1310c89a..fbcf7c62e5a5 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -602,8 +602,8 @@ async def _get_events_from_cache_or_db( """ # Shortcut: check if we have any events in the *in memory* cache - this function # may be called repeatedly for the same event so at this point we cannot reach - # out to any external cache for performance reasons, this is done later on in - # the `get_missing_events_from_cache_or_db` function below. + # out to any external cache for performance reasons. The external cache is + # checked later on in the `get_missing_events_from_cache_or_db` function below. event_entry_map = self._get_events_from_local_cache( event_ids, ) @@ -665,7 +665,8 @@ async def get_missing_events_from_cache_or_db() -> Dict[ # missing_events = {} try: - # Try to fetch from any external cache, in-memory cache checked above + # Try to fetch from any external cache. We already checked the + # in-memory cache above. missing_events = await self._get_events_from_external_cache( missing_events_ids, ) From ca0ca9387779dad8c2d230b106ee536118f61ee0 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Thu, 4 Aug 2022 13:17:30 +0100 Subject: [PATCH 11/11] Don't use unncessary set object Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> --- synapse/storage/databases/main/events_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index fbcf7c62e5a5..e9ff6cfb3455 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -780,7 +780,7 @@ async def _get_events_from_cache( events, update_metrics=update_metrics ) - missing_event_ids = {e for e in events if e not in event_map} + missing_event_ids = (e for e in events if e not in event_map) event_map.update( await self._get_events_from_external_cache( events=missing_event_ids,