From a30c01be75356c67526d96ed6521d6c313404807 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 13:41:36 +0100 Subject: [PATCH 1/8] Fix incorrect juggling of logging contexts in `_PerHostRatelimiter` Introduced in #13499. Signed-off-by: Sean Quah --- changelog.d/13554.misc | 1 + synapse/util/ratelimitutils.py | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 changelog.d/13554.misc diff --git a/changelog.d/13554.misc b/changelog.d/13554.misc new file mode 100644 index 000000000000..99dbcebec8a1 --- /dev/null +++ b/changelog.d/13554.misc @@ -0,0 +1 @@ +Instrument `FederationStateIdsServlet` (`/state_ids`) for understandable traces in Jaeger. diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 434b02b97b9d..7e9593d206ac 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -222,20 +222,29 @@ def on_both(r: object) -> object: # Ensure that we've properly cleaned up. self.sleeping_requests.discard(request_id) self.ready_request_queue.pop(request_id, None) - wait_span_scope.__exit__(None, None, None) - wait_timer_cm.__exit__(None, None, None) return r + ret_defer.addCallbacks(on_start, on_err) + ret_defer.addBoth(on_both) + # Tracing wait_span_scope = start_active_span("ratelimit wait") wait_span_scope.__enter__() + # Metrics wait_timer_cm = queue_wait_timer.time() wait_timer_cm.__enter__() - ret_defer.addCallbacks(on_start, on_err) - ret_defer.addBoth(on_both) - return make_deferred_yieldable(ret_defer) + # The current logcontext is now one created by `wait_span_scope`. + # We must only `__exit__` it once it has been restored by `make_deferred_yieldable`. + def on_after_logcontext_restored(r: object) -> object: + wait_timer_cm.__exit__(None, None, None) + wait_span_scope.__exit__(None, None, None) + return r + + d = make_deferred_yieldable(ret_defer) + d.addBoth(on_after_logcontext_restored) + return d def _on_exit(self, request_id: object) -> None: logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) From b0e018c29082daf8df0ec4300e129e9e8f38ac1a Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 15:07:17 +0100 Subject: [PATCH 2/8] fixup: apply Rich's suggestion --- synapse/util/ratelimitutils.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 7e9593d206ac..ab16b4f42c78 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -227,24 +227,12 @@ def on_both(r: object) -> object: ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) - # Tracing - wait_span_scope = start_active_span("ratelimit wait") - wait_span_scope.__enter__() - - # Metrics - wait_timer_cm = queue_wait_timer.time() - wait_timer_cm.__enter__() - - # The current logcontext is now one created by `wait_span_scope`. - # We must only `__exit__` it once it has been restored by `make_deferred_yieldable`. - def on_after_logcontext_restored(r: object) -> object: - wait_timer_cm.__exit__(None, None, None) - wait_span_scope.__exit__(None, None, None) - return r + async def do_tracing() -> None: + with start_active_span("ratelimit wait"): + with queue_wait_timer.time(): + await make_deferred_yieldable(ret_defer) - d = make_deferred_yieldable(ret_defer) - d.addBoth(on_after_logcontext_restored) - return d + return defer.ensureDeferred(do_tracing()) def _on_exit(self, request_id: object) -> None: logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) From 00d06445e3a908410b66c4ef60239d783e2d7fbf Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 15:16:19 +0100 Subject: [PATCH 3/8] fixup: refactor a little --- synapse/util/ratelimitutils.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index ab16b4f42c78..047cba8e1f85 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -134,12 +134,19 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": request_id = object() ret = self._on_enter(request_id) + ret = defer.ensureDeferred(self._trace_wait(ret)) try: yield ret finally: self._on_exit(request_id) def _on_enter(self, request_id: object) -> "defer.Deferred[None]": + """ + Returns: + A `Deferred` that resolves once the request is allowed by the rate limiter. + The returned `Deferred` does not follow the Synapse logging context rules + and must be wrapped with `make_deferred_yieldable` before awaiting it. + """ time_now = self.clock.time_msec() # remove any entries from request_times which aren't within the window @@ -226,13 +233,18 @@ def on_both(r: object) -> object: ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) + return ret_defer - async def do_tracing() -> None: - with start_active_span("ratelimit wait"): - with queue_wait_timer.time(): - await make_deferred_yieldable(ret_defer) + async def _trace_wait(self, d: "defer.Deferred[None]") -> None: + """Waits for a given `Deferred` to resolve, while timing it for metrics. - return defer.ensureDeferred(do_tracing()) + Args: + d: The `Deferred` to wait for. Must not follow the Synapse logging context + rules. + """ + with start_active_span("ratelimit wait"): + with queue_wait_timer.time(): + await make_deferred_yieldable(d) def _on_exit(self, request_id: object) -> None: logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) From 92456469763b4455fe92d1c11ae18f6bf37bf4ce Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 15:20:36 +0100 Subject: [PATCH 4/8] fixup: small tidup, suggested by clokep --- synapse/util/ratelimitutils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 047cba8e1f85..c2eb0c283bb8 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -242,9 +242,8 @@ async def _trace_wait(self, d: "defer.Deferred[None]") -> None: d: The `Deferred` to wait for. Must not follow the Synapse logging context rules. """ - with start_active_span("ratelimit wait"): - with queue_wait_timer.time(): - await make_deferred_yieldable(d) + with start_active_span("ratelimit wait"), queue_wait_timer.time(): + await make_deferred_yieldable(d) def _on_exit(self, request_id: object) -> None: logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) From b999888faf85d068c43ed180c8baae2c5c446def Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 15:34:23 +0100 Subject: [PATCH 5/8] fixup: call `_on_enter` instead of accepting a deferred --- synapse/util/ratelimitutils.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index c2eb0c283bb8..85e86b1b06a5 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -133,20 +133,17 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": self.host = host request_id = object() - ret = self._on_enter(request_id) - ret = defer.ensureDeferred(self._trace_wait(ret)) + ret = defer.ensureDeferred(self._on_enter_with_tracing(request_id)) try: yield ret finally: self._on_exit(request_id) + async def _on_enter_with_tracing(self, request_id: object) -> None: + with start_active_span("ratelimit wait"), queue_wait_timer.time(): + await self._on_enter(request_id) + def _on_enter(self, request_id: object) -> "defer.Deferred[None]": - """ - Returns: - A `Deferred` that resolves once the request is allowed by the rate limiter. - The returned `Deferred` does not follow the Synapse logging context rules - and must be wrapped with `make_deferred_yieldable` before awaiting it. - """ time_now = self.clock.time_msec() # remove any entries from request_times which aren't within the window @@ -233,17 +230,7 @@ def on_both(r: object) -> object: ret_defer.addCallbacks(on_start, on_err) ret_defer.addBoth(on_both) - return ret_defer - - async def _trace_wait(self, d: "defer.Deferred[None]") -> None: - """Waits for a given `Deferred` to resolve, while timing it for metrics. - - Args: - d: The `Deferred` to wait for. Must not follow the Synapse logging context - rules. - """ - with start_active_span("ratelimit wait"), queue_wait_timer.time(): - await make_deferred_yieldable(d) + return make_deferred_yieldable(ret_defer) def _on_exit(self, request_id: object) -> None: logger.debug("Ratelimit(%s) [%s]: Processed req", self.host, id(request_id)) From b7b50a717ed3beef5ad77630247ef475318eafa8 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 15:57:02 +0100 Subject: [PATCH 6/8] fixup: avoid tracing when rate limit is exceeded --- synapse/util/ratelimitutils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 85e86b1b06a5..1166ffd021e8 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -140,8 +140,11 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": self._on_exit(request_id) async def _on_enter_with_tracing(self, request_id: object) -> None: + # We may raise a `LimitExceededError`, in which case we avoid tracing. + d = self._on_enter(request_id) + with start_active_span("ratelimit wait"), queue_wait_timer.time(): - await self._on_enter(request_id) + await d def _on_enter(self, request_id: object) -> "defer.Deferred[None]": time_now = self.clock.time_msec() From 0bdce68c7426d775bbfe7d75ea86e2def71569d5 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 16:01:30 +0100 Subject: [PATCH 7/8] Revert "fixup: avoid tracing when rate limit is exceeded" This reverts commit b7b50a717ed3beef5ad77630247ef475318eafa8. --- synapse/util/ratelimitutils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 1166ffd021e8..85e86b1b06a5 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -140,11 +140,8 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": self._on_exit(request_id) async def _on_enter_with_tracing(self, request_id: object) -> None: - # We may raise a `LimitExceededError`, in which case we avoid tracing. - d = self._on_enter(request_id) - with start_active_span("ratelimit wait"), queue_wait_timer.time(): - await d + await self._on_enter(request_id) def _on_enter(self, request_id: object) -> "defer.Deferred[None]": time_now = self.clock.time_msec() From 4a398f9de733fa73dc67618d9e99e58f8e9b9a9d Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Thu, 18 Aug 2022 16:24:47 +0100 Subject: [PATCH 8/8] fixup: add comment --- synapse/util/ratelimitutils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 2d1f9cc1d906..f678b52cb4f5 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -154,6 +154,8 @@ def ratelimit(self, host: str) -> "Iterator[defer.Deferred[None]]": self.host = host request_id = object() + # Ideally we'd use `Deferred.fromCoroutine()` here, to save on redundant + # type-checking, but we'd need Twisted >= 21.2. ret = defer.ensureDeferred(self._on_enter_with_tracing(request_id)) try: yield ret