Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CapMan visibility] add throttling to ReferrerGuardRail policy #6014

Merged
merged 12 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions snuba/query/allocation_policies/per_referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
logger = logging.getLogger("snuba.query.allocation_policy_per_referrer")

_DEFAULT_MAX_THREADS = 10
_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER = 100
_REFERRER_CONCURRENT_OVERRIDE = -1
_REFERRER_MAX_THREADS_OVERRIDE = -1
_REQUESTS_THROTTLE_DIVIDER = 1
_THREADS_THROTTLE_DIVIDER = 1


class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy):
xurui-c marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -28,7 +33,9 @@ class ReferrerGuardRailPolicy(BaseConcurrentRateLimitAllocationPolicy):
This concern is orthogonal to customer rate limits in its purpose. This rate limiter being tripped is a problem
caused by sentry developers, not customer abuse. It either means that a feature was release that queries this referrer
too much or that an appropriate rate limit was not set somewhere upstream. It affects customers randomly and basically
acts as a load shedder.
acts as a load shedder. As a referrer approaches the rate limiter's threshold for rejecting queries, that referrer's
queries will get throttled. The threshold for throttling and the (reduced) number of threads are configurable via
_REQUESTS_THROTTLE_DIVIDER and _THREADS_THROTTLE_DIVIDER

For example, a product team may push out a feature that sends 20 snuba queries every 5 seconds on the UI.
In that case, that feature should break but others should continue to be served.
Expand All @@ -47,21 +54,33 @@ def _additional_config_definitions(self) -> list[AllocationPolicyConfig]:
""",
value_type=int,
param_types={},
default=100,
default=_DEFAULT_CONCURRENT_REQUEST_PER_REFERRER,
),
AllocationPolicyConfig(
name="referrer_concurrent_override",
description="""override the concurrent limit for a referrer""",
value_type=int,
param_types={"referrer": str},
default=-1,
default=_REFERRER_CONCURRENT_OVERRIDE,
),
AllocationPolicyConfig(
name="referrer_max_threads_override",
description="""override the max_threads for a referrer, applies to every query made by that referrer""",
param_types={"referrer": str},
value_type=int,
default=-1,
default=_REFERRER_MAX_THREADS_OVERRIDE,
),
AllocationPolicyConfig(
name="requests_throttle_divider",
description="The threshold at which we will decrease the number of threads (THROTTLED_THREADS) used to execute queries",
value_type=int,
xurui-c marked this conversation as resolved.
Show resolved Hide resolved
default=_REQUESTS_THROTTLE_DIVIDER,
),
AllocationPolicyConfig(
name="threads_throttle_divider",
description="The throttled number of threads Clickhouse will use for the query.",
value_type=int,
xurui-c marked this conversation as resolved.
Show resolved Hide resolved
default=_THREADS_THROTTLE_DIVIDER,
),
]

Expand Down Expand Up @@ -100,6 +119,22 @@ def _get_quota_allowance(
query_id,
rate_limit_params,
)
assert (
rate_limit_params.concurrent_limit is not None
), "concurrent_limit must be set"
num_threads = self._get_max_threads(referrer)
requests_throttle_threshold = max(
1,
self.get_config_value("default_concurrent_request_per_referrer")
// self.get_config_value("requests_throttle_divider"),
)
if rate_limit_stats.concurrent > requests_throttle_threshold:
num_threads = max(
1, num_threads // self.get_config_value("threads_throttle_divider")
)
self.metrics.increment(
"concurrent_queries_throttled", tags={"referrer": referrer}
)
self.metrics.timing(
"concurrent_queries_referrer",
rate_limit_stats.concurrent,
Expand All @@ -112,7 +147,7 @@ def _get_quota_allowance(
}
return QuotaAllowance(
can_run=can_run,
max_threads=self._get_max_threads(referrer),
max_threads=num_threads,
explanation=decision_explanation,
)

Expand Down
28 changes: 28 additions & 0 deletions tests/query/allocation_policies/test_per_referrer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ def test_policy_pass_basic(self):
tenant_ids={"referrer": "statistical_detectors"}, query_id="4"
).can_run

@pytest.mark.redis_db
def test_throttle(self) -> None:
policy = ReferrerGuardRailPolicy.from_kwargs(
**{
"storage_key": "generic_metrics_distributions",
"required_tenant_types": ["referrer"],
}
)

policy.set_config_value("default_concurrent_request_per_referrer", 4)
policy.set_config_value("requests_throttle_divider", 2)
policy.set_config_value("threads_throttle_divider", 2)
first_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="1"
)
assert first_quota_allowance.max_threads == policy.max_threads

second_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="2"
)
assert second_quota_allowance.max_threads == policy.max_threads

third_quota_allowance = policy.get_quota_allowance(
tenant_ids={"referrer": "statistical_detectors"}, query_id="3"
)
assert third_quota_allowance.max_threads == policy.max_threads // 2
assert third_quota_allowance.can_run

@pytest.mark.redis_db
def test_override(self):
policy = ReferrerGuardRailPolicy.from_kwargs(
Expand Down
Loading