Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

envoy: add local rate limit metrics #16313

Merged
merged 2 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions envoy/changelog.d/16313.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add local rate limit metrics
21 changes: 21 additions & 0 deletions envoy/datadog_checks/envoy/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
LABEL_MAP = {
'cluster_name': 'envoy_cluster',
'envoy_cluster_name': 'envoy_cluster',
'envoy_local_http_ratelimit_prefix': 'stat_prefix', # local rate limit
'envoy_http_conn_manager_prefix': 'stat_prefix', # tracing
'envoy_listener_address': 'address', # listener
'envoy_virtual_cluster': 'virtual_envoy_cluster', # vhost
Expand Down Expand Up @@ -89,6 +90,26 @@
'metric_type': 'monotonic_count',
'new_name': 'listener.downstream_cx.count',
},
r'envoy_(.+)_http_local_rate_limit_enabled$': {
'label_name': 'stat_prefix',
'metric_type': 'monotonic_count',
'new_name': 'http.local_rate_limit_enabled.count',
},
r'envoy_(.+)_http_local_rate_limit_enforced$': {
'label_name': 'stat_prefix',
'metric_type': 'monotonic_count',
'new_name': 'http.local_rate_limit_enforced.count',
},
r'envoy_(.+)_http_local_rate_limit_ok$': {
'label_name': 'stat_prefix',
'metric_type': 'monotonic_count',
'new_name': 'http.local_rate_limit_ok.count',
},
r'envoy_(.+)_http_local_rate_limit_rate_limited$': {
'label_name': 'stat_prefix',
'metric_type': 'monotonic_count',
'new_name': 'http.local_rate_limit_rate_limited.count',
},
}


Expand Down
40 changes: 39 additions & 1 deletion envoy/datadog_checks/envoy/metrics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) Datadog, Inc. 2018-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
from .utils import make_metric_tree
from .utils import make_metric_tree, modify_metrics_dict

METRIC_PREFIX = 'envoy.'

Expand Down Expand Up @@ -372,6 +372,10 @@
'envoy_http_rbac_denied': 'http.rbac_denied',
'envoy_http_rbac_shadow_allowed': 'http.rbac_shadow_allowed',
'envoy_http_rbac_shadow_denied': 'http.rbac_shadow_denied',
'envoy_http_local_rate_limit_enabled': 'http.local_rate_limit_enabled',
'envoy_http_local_rate_limit_enforced': 'http.local_rate_limit_enforced',
'envoy_http_local_rate_limit_rate_limited': 'http.local_rate_limit_rate_limited',
'envoy_http_local_rate_limit_ok': 'http.local_rate_limit_ok',
}

# fmt: off
Expand Down Expand Up @@ -3857,7 +3861,41 @@
),
'method': 'monotonic_count',
},
# "*." to match at the beginning of raw metric if it doesn't have a standard name
'*.http_local_rate_limit.enabled': {
'tags': (
('stat_prefix',),
(),
(),
),
'method': 'monotonic_count',
},
'*.http_local_rate_limit.enforced': {
'tags': (
('stat_prefix',),
(),
(),
),
'method': 'monotonic_count',
},
'*.http_local_rate_limit.rate_limited': {
'tags': (
('stat_prefix',),
(),
(),
),
'method': 'monotonic_count',
},
'*.http_local_rate_limit.ok': {
'tags': (
('stat_prefix',),
(),
(),
),
'method': 'monotonic_count',
},
}
# fmt: on

MOD_METRICS = modify_metrics_dict(METRICS)
METRIC_TREE = make_metric_tree(METRICS)
13 changes: 9 additions & 4 deletions envoy/datadog_checks/envoy/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from six.moves import range, zip

from .errors import UnknownMetric, UnknownTags
from .metrics import METRIC_PREFIX, METRIC_TREE, METRICS
from .metrics import METRIC_PREFIX, METRIC_TREE, MOD_METRICS

HISTOGRAM = re.compile(r'([P0-9.]+)\(([^,]+)')
PERCENTILE_SUFFIX = {
Expand Down Expand Up @@ -65,6 +65,10 @@ def _parse_metric(metric, metric_mapping, skip_part=None):
else:
tag_value_builder.append(metric_part)
tags_to_build += 1
# Allows for the wildcard usage for the scenario in which the raw metric starts with a configurable name.
# E.g.: stat_prefix.http_local_rate_limit.ok
if "*" in metric_mapping:
metric_mapping = metric_mapping["*"]
return metric_parts, tag_value_builder, tag_names, tag_values, unknown_tags, tags_to_build, metric_mapping


Expand All @@ -78,11 +82,12 @@ def parse_metric(metric, retry=False, metric_mapping=METRIC_TREE, disable_legacy
'listener.0.0.0.0_80.downstream_cx_total' ->
('listener.downstream_cx_total', ['address:0.0.0.0_80'], 'count')
"""

metric_parts, tag_value_builder, tag_names, tag_values, unknown_tags, tags_to_build, last_mapping = _parse_metric(
metric, metric_mapping
)
parsed_metric = '.'.join(metric_parts)
if parsed_metric not in METRICS:
if parsed_metric not in MOD_METRICS:
if retry:
skip_parts = []
# Retry parsing for metrics by skipping the last matched metric part
Expand All @@ -102,7 +107,7 @@ def parse_metric(metric, retry=False, metric_mapping=METRIC_TREE, disable_legacy
last_mapping,
) = _parse_metric(metric, metric_mapping, skip_part)
parsed_metric = '.'.join(metric_parts)
if parsed_metric in METRICS:
if parsed_metric in MOD_METRICS:
break
else:
raise UnknownMetric
Expand Down Expand Up @@ -133,7 +138,7 @@ def parse_metric(metric, retry=False, metric_mapping=METRIC_TREE, disable_legacy

tags = ['{}:{}'.format(tag_name, tag_value) for tag_name, tag_value in zip(tag_names, tag_values)]

return METRIC_PREFIX + parsed_metric, tags, METRICS[parsed_metric]['method']
return METRIC_PREFIX + parsed_metric, tags, MOD_METRICS[parsed_metric]['method']


def construct_tag_values(tag_builder, num_tags):
Expand Down
17 changes: 17 additions & 0 deletions envoy/datadog_checks/envoy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,20 @@ def _get_server_info(server_info_url, log, http):
return None

return raw_version


def modify_metrics_dict(metrics):
# This function removes the wildcard from the metric list defined in metrics.py. Parser.py compares the compiled
# metric with the metrics lists and if the entry is not found, it will raise an UnknowMetric error. Since the "*."
# is used for wildcard matching, the comparison will always be false. E.g.:
# "*.http_local_rate_limit.enabled" =/= "http_local_rate_limit.enabled
# This is needed for metrics that start with a configurable namespace such as:
# `<stat_prefix>.http_local_rate_limit.enabled` and parsed as http_local_rate_limit.enabled with tag
# `stat_prefix=<stat_prefix>` in the parser.py
mod_metrics_dict = {}

for key, value in metrics.items():
new_key = key.replace('*.', '')
mod_metrics_dict[new_key] = value

return mod_metrics_dict
10 changes: 9 additions & 1 deletion envoy/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ envoy.cluster.http2.keepalive_timeout.count,count,,,,[OpenMetrics V2],0,envoy,,
envoy.cluster.http2.metadata_empty_frames.count,count,,,,[OpenMetrics V2],0,envoy,,
envoy.cluster.http2.outbound_control_flood.count,count,,connection,,[OpenMetrics V2] Total number of connections terminated for exceeding the limit on outbound frames of types PING/SETTINGS/RST_STREAM,-1,envoy,,
envoy.cluster.http2.outbound_flood.count,count,,connection,,[OpenMetrics V2] Total number of connections terminated for exceeding the limit on outbound frames of all types,-1,envoy,,
envoy.cluster.http2.pending_send_bytes,gauge,,byte,,[OpenMetrics V2]Currently buffered body data in bytes waiting to be written when stream/connection window is opened. ,0,envoy,,
envoy.cluster.http2.pending_send_bytes,gauge,,byte,,[OpenMetrics V2]Currently buffered body data in bytes waiting to be written when stream/connection window is opened.,0,envoy,,
envoy.cluster.http2.requests_rejected_with_underscores_in_headers.count,count,,request,,[OpenMetrics V2] Total numbers of rejected requests due to header names containing underscores. This action is configured by setting the headers_with_underscores_action config setting.,0,envoy,,
envoy.cluster.http2.rx_messaging_error.count,count,,error,,[OpenMetrics V2] Total number of invalid received frames that violated section 8 of the HTTP/2 spec,-1,envoy,,
envoy.cluster.http2.rx_reset.count,count,,message,,[OpenMetrics V2] Total number of reset stream frames received by Envoy,0,envoy,,
Expand Down Expand Up @@ -534,6 +534,10 @@ envoy.http.rbac_allowed.count,count,,request,,[OpenMetrics V2] Total requests th
envoy.http.rbac_denied.count,count,,request,,[OpenMetrics V2] Total requests that were denied access,-1,envoy,,
envoy.http.rbac_shadow_allowed.count,count,,request,,[OpenMetrics V2] Total requests that would be allowed access by the filter's shadow rules,-1,envoy,,
envoy.http.rbac_shadow_denied.count,count,,request,,[OpenMetrics V2] Total requests that would be denied access by the filter's shadow rules,-1,envoy,,
envoy.http.local_rate_limit_enabled.count,count,,request,,[OpenMetrics V2] Total number of requests for which the rate limiter was consulted,-1,envoy,,
envoy.http.local_rate_limit_enforced.count,count,,request,,[OpenMetrics V2] Total number of requests for which rate limiting was applied (e.g.: 429 returned),-1,envoy,,
envoy.http.local_rate_limit_rate_limited.count,count,,request,,[OpenMetrics V2] Total responses without an available token (but not necessarily enforced),-1,envoy,,
envoy.http.local_rate_limit_ok.count,count,,request,,[OpenMetrics V2] Total under limit responses from the token bucket,-1,envoy,,
envoy.http.rs_too_large,count,,error,,[Legacy] Total response errors due to buffering an overly large body,-1,envoy,,
envoy.http.user_agent.downstream_cx_total,count,,connection,,[Legacy] Total connections,0,envoy,,
envoy.http.user_agent.downstream_cx_destroy_remote_active_rq,count,,connection,,[Legacy] Total connections destroyed remotely with active requests,-1,envoy,,
Expand Down Expand Up @@ -965,3 +969,7 @@ envoy.access_logs.grpc_access_log.logs_dropped,count,,,,[Legacy] Number of GRPC
envoy.access_logs.grpc_access_log.logs_written,count,,,,[Legacy] Number of GRPC Access Logs written,-1,envoy,,
envoy.access_logs.grpc_access_log.logs_dropped.count,count,,,,[OpenMetrics V2] Count of GRPC Access Logs dropped,-1,envoy,,
envoy.access_logs.grpc_access_log.logs_written.count,count,,,,[OpenMetrics V2] Count of GRPC Access Logs written,-1,envoy,,
envoy.http_local_rate_limit.enabled,count,,request,,[Legacy] Total number of requests for which the rate limiter was consulted,-1,envoy,,
envoy.http_local_rate_limit.enforced,count,,request,,[Legacy] Total number of requests for which rate limiting was applied (e.g.: 429 returned),-1,envoy,,
envoy.http_local_rate_limit.rate_limited,count,,request,,[Legacy] Total number of responses without an available token (but not necessarily enforced),-1,envoy,,
envoy.http_local_rate_limit.ok,count,,request,,[Legacy] Total number of under the limit responses from the token bucket,-1,envoy,,
21 changes: 17 additions & 4 deletions envoy/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,16 @@
"vhost.vcluster.upstream_rq.count",
]

FLAKY_METRICS = {
LOCAL_RATE_LIMIT_METRICS = [
"http.local_rate_limit_enabled.count",
"http.local_rate_limit_enforced.count",
"http.local_rate_limit_rate_limited.count",
"http.local_rate_limit_ok.count",
]

RATE_LIMIT_STAT_PREFIX_TAG = 'stat_prefix:http_local_rate_limiter'

FLAKY_METRICS = [
"listener.downstream_cx_active",
"listener.downstream_cx_destroy.count",
"cluster.internal.upstream_rq.count",
Expand All @@ -384,9 +393,9 @@
"cluster.upstream_rq_xx.count",
"access_logs.grpc_access_log.logs_written.count",
"access_logs.grpc_access_log.logs_dropped.count",
}
]

MOCKED_PROMETHEUS_METRICS = {
MOCKED_PROMETHEUS_METRICS = [
"cluster.assignment_stale.count",
"cluster.assignment_timeout_received.count",
"cluster.bind_errors.count",
Expand Down Expand Up @@ -602,6 +611,10 @@
"http.tracing.not_traceable.count",
"http.tracing.random_sampling.count",
"http.tracing.service_forced.count",
"http.local_rate_limit_enabled.count",
"http.local_rate_limit_enforced.count",
"http.local_rate_limit_rate_limited.count",
"http.local_rate_limit_ok.count",
"listener.admin.downstream_cx.count",
"listener.admin.downstream_cx_active",
"listener.admin.downstream_cx_destroy.count",
Expand Down Expand Up @@ -696,7 +709,7 @@
"tcp.on_demand_cluster_timeout.count",
"tcp.upstream_flush.count",
"tcp.upstream_flush_active",
}
]


def get_fixture_path(filename):
Expand Down
26 changes: 26 additions & 0 deletions envoy/tests/docker/api_v3/front-envoy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,33 @@ static_resources:
envoy.filters.http.ext_authz:
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_authz.v3.ExtAuthzPerRoute
disabled: true
- match:
prefix: "/ratelimit/service/1"
route:
cluster: service1
typed_per_filter_config:
envoy.filters.http.local_ratelimit:
"@type": type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit
stat_prefix: http_local_rate_limiter
token_bucket:
max_tokens: 10000
tokens_per_fill: 1000
fill_interval: 1s
filter_enabled:
runtime_key: local_rate_limit_enabled
default_value:
numerator: 100
denominator: HUNDRED
filter_enforced:
runtime_key: local_rate_limit_enforced
default_value:
numerator: 100
denominator: HUNDRED
http_filters:
- name: envoy.filters.http.local_ratelimit
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.local_ratelimit.v3.LocalRateLimit
stat_prefix: http_local_rate_limiter
- name: envoy.filters.http.rbac
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.rbac.v3.RBAC
Expand Down
8 changes: 8 additions & 0 deletions envoy/tests/fixtures/legacy/local_rate_limit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
http_local_rate_limiter.http_local_rate_limit.enabled: 0
http_local_rate_limiter.http_local_rate_limit.enforced: 0
http_local_rate_limiter.http_local_rate_limit.ok: 0
http_local_rate_limiter.http_local_rate_limit.rate_limited: 0
foo.http_local_rate_limit.enabled: 0
foo.http_local_rate_limit.enforced: 0
foo.http_local_rate_limit.ok: 0
foo.http_local_rate_limit.rate_limited: 0
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,14 @@ envoy_cluster_manager_cds_control_plane_rate_limit_enforced{} 0
envoy_cluster_manager_cds_update_attempt{} 13
# TYPE envoy_cluster_manager_cds_update_failure counter
envoy_cluster_manager_cds_update_failure{} 12
# TYPE envoy_http_local_rate_limiter_http_local_rate_limit_enabled counter
envoy_http_local_rate_limiter_http_local_rate_limit_enabled{} 0
# TYPE envoy_http_local_rate_limiter_http_local_rate_limit_enforced counter
envoy_http_local_rate_limiter_http_local_rate_limit_enforced{} 0
# TYPE envoy_http_local_rate_limiter_http_local_rate_limit_ok counter
envoy_http_local_rate_limiter_http_local_rate_limit_ok{} 0
# TYPE envoy_http_local_rate_limiter_http_local_rate_limit_rate_limited counter
envoy_http_local_rate_limiter_http_local_rate_limit_rate_limited{} 0
# TYPE envoy_listener_admin_downstream_cx_destroy counter
envoy_listener_admin_downstream_cx_destroy{} 7
# TYPE envoy_listener_admin_http_downstream_rq_xx counter
Expand Down
8 changes: 8 additions & 0 deletions envoy/tests/fixtures/openmetrics/openmetrics_1_28.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# TYPE envoy_http_local_rate_limit_enabled counter
envoy_http_local_rate_limit_enabled{envoy_local_http_ratelimit_prefix="http_local_rate_limiter"} 0
# TYPE envoy_http_local_rate_limit_enforced counter
envoy_http_local_rate_limit_enforced{envoy_local_http_ratelimit_prefix="http_local_rate_limiter"} 0
# TYPE envoy_http_local_rate_limit_ok counter
envoy_http_local_rate_limit_ok{envoy_local_http_ratelimit_prefix="http_local_rate_limiter"} 0
# TYPE envoy_http_local_rate_limit_rate_limited counter
envoy_http_local_rate_limit_rate_limited{envoy_local_http_ratelimit_prefix="http_local_rate_limiter"} 0
9 changes: 9 additions & 0 deletions envoy/tests/legacy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,12 @@
"envoy.cluster.ext_authz.failure_mode_allowed",
"envoy.cluster.ext_authz.ok",
]

LOCAL_RATE_LIMIT_METRICS = [
"envoy.http_local_rate_limit.enabled",
"envoy.http_local_rate_limit.enforced",
"envoy.http_local_rate_limit.rate_limited",
"envoy.http_local_rate_limit.ok",
]

RATE_LIMIT_STAT_PREFIX_TAG = ['stat_prefix:http_local_rate_limiter', 'stat_prefix:foo']
Loading
Loading