From b8dbf37d9ba0fee6e392c9d0e10b76586b0aad34 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 13:26:55 -0500 Subject: [PATCH 01/12] added throttling --- snuba/query/allocation_policies/concurrent_rate_limit.py | 4 ---- snuba/query/allocation_policies/per_referrer.py | 8 +++++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/snuba/query/allocation_policies/concurrent_rate_limit.py b/snuba/query/allocation_policies/concurrent_rate_limit.py index afd93aff3a..08bf8ef423 100644 --- a/snuba/query/allocation_policies/concurrent_rate_limit.py +++ b/snuba/query/allocation_policies/concurrent_rate_limit.py @@ -67,10 +67,6 @@ def _is_within_rate_limit( rate_limit_shard_factor = self.get_config_value("rate_limit_shard_factor") assert isinstance(rate_history_s, (int, float)) assert isinstance(rate_limit_shard_factor, int) - assert ( - rate_limit_params.concurrent_limit is not None - ), "concurrent_limit must be set" - assert rate_limit_shard_factor > 0 rate_limit_stats = rate_limit_start_request( diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 0262fd8014..92163539cc 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -100,6 +100,12 @@ def _get_quota_allowance( query_id, rate_limit_params, ) + assert ( + rate_limit_params.concurrent_limit is not None + ), "concurrent_limit must be set" + num_threads = self._get_max_threads(referrer) + if rate_limit_stats.concurrent >= rate_limit_params.concurrent_limit // 2: + num_threads = self._get_max_threads(referrer) // 2 self.metrics.timing( "concurrent_queries_referrer", rate_limit_stats.concurrent, @@ -112,7 +118,7 @@ def _get_quota_allowance( } return QuotaAllowance( can_run=can_run, - max_threads=self._get_max_threads(referrer), + max_threads=num_threads, explanation=decision_explanation, ) From b725d677df280a1cb181523969dd51a4b4a2e18a Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 13:34:27 -0500 Subject: [PATCH 02/12] assertion --- snuba/query/allocation_policies/concurrent_rate_limit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/snuba/query/allocation_policies/concurrent_rate_limit.py b/snuba/query/allocation_policies/concurrent_rate_limit.py index 08bf8ef423..afd93aff3a 100644 --- a/snuba/query/allocation_policies/concurrent_rate_limit.py +++ b/snuba/query/allocation_policies/concurrent_rate_limit.py @@ -67,6 +67,10 @@ def _is_within_rate_limit( rate_limit_shard_factor = self.get_config_value("rate_limit_shard_factor") assert isinstance(rate_history_s, (int, float)) assert isinstance(rate_limit_shard_factor, int) + assert ( + rate_limit_params.concurrent_limit is not None + ), "concurrent_limit must be set" + assert rate_limit_shard_factor > 0 rate_limit_stats = rate_limit_start_request( From 6a1f959b420b31622ddfdfb84f4f75f05d32c8b4 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 14:17:25 -0500 Subject: [PATCH 03/12] added test --- .../query/allocation_policies/per_referrer.py | 2 +- .../allocation_policies/test_per_referrer.py | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 92163539cc..b694d1be03 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -104,7 +104,7 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) - if rate_limit_stats.concurrent >= rate_limit_params.concurrent_limit // 2: + if rate_limit_stats.concurrent > rate_limit_params.concurrent_limit // 2: num_threads = self._get_max_threads(referrer) // 2 self.metrics.timing( "concurrent_queries_referrer", diff --git a/tests/query/allocation_policies/test_per_referrer.py b/tests/query/allocation_policies/test_per_referrer.py index 1600cc1463..a0847499aa 100644 --- a/tests/query/allocation_policies/test_per_referrer.py +++ b/tests/query/allocation_policies/test_per_referrer.py @@ -51,6 +51,32 @@ def test_policy_pass_basic(self): tenant_ids={"referrer": "statistical_detectors"}, query_id="4" ).can_run + @pytest.mark.redis_db + def test_throttle(self) -> None: + policy = ReferrerGuardRailPolicy.from_kwargs( + **{ + "storage_key": "generic_metrics_distributions", + "required_tenant_types": ["referrer"], + } + ) + + policy.set_config_value("default_concurrent_request_per_referrer", 4) + first_quota_allowance = policy.get_quota_allowance( + tenant_ids={"referrer": "statistical_detectors"}, query_id="1" + ) + assert first_quota_allowance.max_threads == policy.max_threads + + second_quota_allowance = policy.get_quota_allowance( + tenant_ids={"referrer": "statistical_detectors"}, query_id="2" + ) + assert second_quota_allowance.max_threads == policy.max_threads + + third_quota_allowance = policy.get_quota_allowance( + tenant_ids={"referrer": "statistical_detectors"}, query_id="3" + ) + assert third_quota_allowance.max_threads == policy.max_threads // 2 + assert third_quota_allowance.can_run + @pytest.mark.redis_db def test_override(self): policy = ReferrerGuardRailPolicy.from_kwargs( From 49bbcf94ce5dd561ea735565e726b0ecb3532b86 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 15:13:06 -0500 Subject: [PATCH 04/12] made throttling configurable --- .../query/allocation_policies/per_referrer.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index b694d1be03..d75e426460 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -63,6 +63,21 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]: value_type=int, default=-1, ), + AllocationPolicyConfig( + name="throttle_threshold", + description="The threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries", + param_types={"referrer": str}, + value_type=int, + default=self.get_config_value("default_concurrent_request_per_referrer") + // 2, + ), + AllocationPolicyConfig( + name="throttled_threads", + description="The throttled number of threads Clickhouse will use for the query.", + param_types={"referrer": str}, + value_type=int, + default=_DEFAULT_MAX_THREADS // 2, + ), ] def _get_max_threads(self, referrer: str) -> int: @@ -104,8 +119,8 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) - if rate_limit_stats.concurrent > rate_limit_params.concurrent_limit // 2: - num_threads = self._get_max_threads(referrer) // 2 + if rate_limit_stats.concurrent > self.get_config_value("throttle_threshold"): + num_threads = self.get_config_value("throttled_threads") self.metrics.timing( "concurrent_queries_referrer", rate_limit_stats.concurrent, From 1c129116a905e36463cb9406ad40ea08c3408ecb Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 16:10:27 -0500 Subject: [PATCH 05/12] fix --- snuba/query/allocation_policies/per_referrer.py | 10 ++++++---- tests/query/allocation_policies/test_per_referrer.py | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index d75e426460..4b40f21a0f 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -66,15 +66,12 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]: AllocationPolicyConfig( name="throttle_threshold", description="The threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries", - param_types={"referrer": str}, value_type=int, - default=self.get_config_value("default_concurrent_request_per_referrer") - // 2, + default=50, ), AllocationPolicyConfig( name="throttled_threads", description="The throttled number of threads Clickhouse will use for the query.", - param_types={"referrer": str}, value_type=int, default=_DEFAULT_MAX_THREADS // 2, ), @@ -119,6 +116,11 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) + print("rate_limit_stats.concurrent", rate_limit_stats.concurrent) + print( + "self.get_config_value('throttle_threshold')", + self.get_config_value("throttle_threshold"), + ) if rate_limit_stats.concurrent > self.get_config_value("throttle_threshold"): num_threads = self.get_config_value("throttled_threads") self.metrics.timing( diff --git a/tests/query/allocation_policies/test_per_referrer.py b/tests/query/allocation_policies/test_per_referrer.py index a0847499aa..9ee6603007 100644 --- a/tests/query/allocation_policies/test_per_referrer.py +++ b/tests/query/allocation_policies/test_per_referrer.py @@ -61,6 +61,7 @@ def test_throttle(self) -> None: ) policy.set_config_value("default_concurrent_request_per_referrer", 4) + policy.set_config_value("throttle_threshold", 2) first_quota_allowance = policy.get_quota_allowance( tenant_ids={"referrer": "statistical_detectors"}, query_id="1" ) From ff424777a1c66eb229a6844b085175b4720c7128 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Mon, 10 Jun 2024 16:11:30 -0500 Subject: [PATCH 06/12] take out print statements --- snuba/query/allocation_policies/per_referrer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 4b40f21a0f..d4fd76b7da 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -116,11 +116,6 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) - print("rate_limit_stats.concurrent", rate_limit_stats.concurrent) - print( - "self.get_config_value('throttle_threshold')", - self.get_config_value("throttle_threshold"), - ) if rate_limit_stats.concurrent > self.get_config_value("throttle_threshold"): num_threads = self.get_config_value("throttled_threads") self.metrics.timing( From 80407172680a33d8e7ccd0c9f74cd3f037bd15e5 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 10:51:57 -0700 Subject: [PATCH 07/12] comments --- .../query/allocation_policies/per_referrer.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index d4fd76b7da..7f9315698a 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -19,6 +19,11 @@ logger = logging.getLogger("snuba.query.allocation_policy_per_referrer") _DEFAULT_MAX_THREADS = 10 +_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER = 100 +_REFERRER_CONCURRENT_OVERRIDE = -1 +_REFERRER_MAX_THREADS_OVERRIDE = -1 +_REQUESTS_THROTTLE_DIVIDER = 0.5 +_THREADS_THROTTLE_DIVIDER = 0.5 class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy): @@ -47,33 +52,33 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]: """, value_type=int, param_types={}, - default=100, + default=_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER, ), AllocationPolicyConfig( name="referrer_concurrent_override", description="""override the concurrent limit for a referrer""", value_type=int, param_types={"referrer": str}, - default=-1, + default=_REFERRER_CONCURRENT_OVERRIDE, ), AllocationPolicyConfig( name="referrer_max_threads_override", description="""override the max_threads for a referrer, applies to every query made by that referrer""", param_types={"referrer": str}, value_type=int, - default=-1, + default=_REFERRER_MAX_THREADS_OVERRIDE, ), AllocationPolicyConfig( - name="throttle_threshold", + name="requests_throttle_divider", description="The threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries", value_type=int, - default=50, + default=_REQUESTS_THROTTLE_DIVIDER, ), AllocationPolicyConfig( - name="throttled_threads", + name="threads_throttle_divider", description="The throttled number of threads Clickhouse will use for the query.", value_type=int, - default=_DEFAULT_MAX_THREADS // 2, + default=_THREADS_THROTTLE_DIVIDER, ), ] @@ -116,8 +121,11 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) - if rate_limit_stats.concurrent > self.get_config_value("throttle_threshold"): - num_threads = self.get_config_value("throttled_threads") + requests_throttle_threshold = self.get_config_value( + "requests_throttle_divider" + ) * self.get_config_value("default_concurrent_request_per_referrer") + if rate_limit_stats.concurrent > requests_throttle_threshold: + num_threads *= self.get_config_value("threads_throttle_divider") self.metrics.timing( "concurrent_queries_referrer", rate_limit_stats.concurrent, From 8c475a86cf778502892291dd8e63bcf9a846b419 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 10:52:38 -0700 Subject: [PATCH 08/12] changed default for roll out --- snuba/query/allocation_policies/per_referrer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 7f9315698a..413c3fd0b1 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -22,8 +22,8 @@ _DEFAULT_CONCURRENT_REQUEST_PER_REFERRER = 100 _REFERRER_CONCURRENT_OVERRIDE = -1 _REFERRER_MAX_THREADS_OVERRIDE = -1 -_REQUESTS_THROTTLE_DIVIDER = 0.5 -_THREADS_THROTTLE_DIVIDER = 0.5 +_REQUESTS_THROTTLE_DIVIDER = 1 +_THREADS_THROTTLE_DIVIDER = 1 class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy): From d1f8162e4d2a281e8aa176da7868c042ed3ffe20 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 11:19:30 -0700 Subject: [PATCH 09/12] test --- snuba/query/allocation_policies/per_referrer.py | 3 +++ tests/query/allocation_policies/test_per_referrer.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 413c3fd0b1..96a42452ff 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -126,6 +126,9 @@ def _get_quota_allowance( ) * self.get_config_value("default_concurrent_request_per_referrer") if rate_limit_stats.concurrent > requests_throttle_threshold: num_threads *= self.get_config_value("threads_throttle_divider") + self.metrics.increment( + "concurrent_queries_throttled", tags={"referrer": referrer} + ) self.metrics.timing( "concurrent_queries_referrer", rate_limit_stats.concurrent, diff --git a/tests/query/allocation_policies/test_per_referrer.py b/tests/query/allocation_policies/test_per_referrer.py index 9ee6603007..b1b86a2473 100644 --- a/tests/query/allocation_policies/test_per_referrer.py +++ b/tests/query/allocation_policies/test_per_referrer.py @@ -61,7 +61,8 @@ def test_throttle(self) -> None: ) policy.set_config_value("default_concurrent_request_per_referrer", 4) - policy.set_config_value("throttle_threshold", 2) + policy.set_config_value("requests_throttle_divider", 0.5) + policy.set_config_value("threads_throttle_divider", 0.5) first_quota_allowance = policy.get_quota_allowance( tenant_ids={"referrer": "statistical_detectors"}, query_id="1" ) From 28efeb1636893df53c5f580db9e1bb73ebb45e07 Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 11:54:24 -0700 Subject: [PATCH 10/12] test --- snuba/query/allocation_policies/per_referrer.py | 6 +++--- tests/query/allocation_policies/test_per_referrer.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 96a42452ff..928461ffcd 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -122,10 +122,10 @@ def _get_quota_allowance( ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) requests_throttle_threshold = self.get_config_value( - "requests_throttle_divider" - ) * self.get_config_value("default_concurrent_request_per_referrer") + "default_concurrent_request_per_referrer" + ) // self.get_config_value("requests_throttle_divider") if rate_limit_stats.concurrent > requests_throttle_threshold: - num_threads *= self.get_config_value("threads_throttle_divider") + num_threads //= self.get_config_value("threads_throttle_divider") self.metrics.increment( "concurrent_queries_throttled", tags={"referrer": referrer} ) diff --git a/tests/query/allocation_policies/test_per_referrer.py b/tests/query/allocation_policies/test_per_referrer.py index b1b86a2473..680ed56229 100644 --- a/tests/query/allocation_policies/test_per_referrer.py +++ b/tests/query/allocation_policies/test_per_referrer.py @@ -61,8 +61,8 @@ def test_throttle(self) -> None: ) policy.set_config_value("default_concurrent_request_per_referrer", 4) - policy.set_config_value("requests_throttle_divider", 0.5) - policy.set_config_value("threads_throttle_divider", 0.5) + policy.set_config_value("requests_throttle_divider", 2) + policy.set_config_value("threads_throttle_divider", 2) first_quota_allowance = policy.get_quota_allowance( tenant_ids={"referrer": "statistical_detectors"}, query_id="1" ) From fe799c04e57c88bc10026d24a7a904591f113b2b Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 15:13:24 -0700 Subject: [PATCH 11/12] division --- snuba/query/allocation_policies/per_referrer.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index 928461ffcd..e779810ec4 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -33,7 +33,9 @@ class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy): This concern is orthogonal to customer rate limits in its purpose. This rate limiter being tripped is a problem caused by sentry developers, not customer abuse. It either means that a feature was release that queries this referrer too much or that an appropriate rate limit was not set somewhere upstream. It affects customers randomly and basically - acts as a load shedder. + acts as a load shedder. As a referrer approaches the rate limiter's threshold for rejecting queries, that referrer's + queries will get throttled. The threshold for throttling and the (reduced) number of threads are configurable via + _REQUESTS_THROTTLE_DIVIDER and _THREADS_THROTTLE_DIVIDER For example, a product team may push out a feature that sends 20 snuba queries every 5 seconds on the UI. In that case, that feature should break but others should continue to be served. @@ -121,11 +123,15 @@ def _get_quota_allowance( rate_limit_params.concurrent_limit is not None ), "concurrent_limit must be set" num_threads = self._get_max_threads(referrer) - requests_throttle_threshold = self.get_config_value( - "default_concurrent_request_per_referrer" - ) // self.get_config_value("requests_throttle_divider") + requests_throttle_threshold = max( + 1, + self.get_config_value("default_concurrent_request_per_referrer") + // self.get_config_value("requests_throttle_divider"), + ) if rate_limit_stats.concurrent > requests_throttle_threshold: - num_threads //= self.get_config_value("threads_throttle_divider") + num_threads = max( + 1, num_threads // self.get_config_value("threads_throttle_divider") + ) self.metrics.increment( "concurrent_queries_throttled", tags={"referrer": referrer} ) From ddb8981c33cd2cfbe64367d97515a153a77aa56d Mon Sep 17 00:00:00 2001 From: Rachel Chen Date: Tue, 11 Jun 2024 15:29:22 -0700 Subject: [PATCH 12/12] descriptions --- snuba/query/allocation_policies/per_referrer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snuba/query/allocation_policies/per_referrer.py b/snuba/query/allocation_policies/per_referrer.py index e779810ec4..f8503b34c0 100644 --- a/snuba/query/allocation_policies/per_referrer.py +++ b/snuba/query/allocation_policies/per_referrer.py @@ -72,13 +72,13 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]: ), AllocationPolicyConfig( name="requests_throttle_divider", - description="The threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries", + description="default_concurrent_request_per_referrer divided by this value will be the threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries", value_type=int, default=_REQUESTS_THROTTLE_DIVIDER, ), AllocationPolicyConfig( name="threads_throttle_divider", - description="The throttled number of threads Clickhouse will use for the query.", + description="max threads divided by this number is the number of threads we use to execute queries for a throttled referrer", value_type=int, default=_THREADS_THROTTLE_DIVIDER, ),