From efc4b7aebcf9170f05db9ca67fa28f098b8f295c Mon Sep 17 00:00:00 2001 From: Fabrice Rabaute Date: Thu, 3 Jun 2021 12:00:16 -0700 Subject: [PATCH] ready: Add option to enable envoy readiness endpoint from worker /ready endpoint used by emissary is using the admin port (8001 by default). This generates a problem during config reloads with large configs as the admin thread is blocking so the /ready endpoint can be very slow to answer (in the order of several seconds, even more). The problem is described in this envoy issue: https://github.com/envoyproxy/envoy/issues/16425 This change is trying to fix the /ready endpoint problem. The /ready endpoint can be exposed in the worker pool by adding a listener+ health check http filter. This way, the /ready endpoint is fast and it is not blocked by any config reload or blocking admin operation as it depends on the worker pool. Future changes will allow to use this endpoint with diagd and the go code as well so they get a fast /ready endpoint and they do not use the admin port. This listener is disabled by default. the config "read_port" can be used to set the port and enable this new listener on envoy. Signed-off-by: Fabrice Rabaute --- docs/releaseNotes.yml | 10 ++ pkg/ambex/main.go | 1 + python/ambassador/constants.py | 1 + python/ambassador/envoy/v3/v3config.py | 2 + python/ambassador/envoy/v3/v3ready.py | 156 +++++++++++++++++++++++++ python/ambassador/ir/irambassador.py | 2 + 6 files changed, 172 insertions(+) create mode 100644 python/ambassador/envoy/v3/v3ready.py diff --git a/docs/releaseNotes.yml b/docs/releaseNotes.yml index c99e33c442c..de6bf4d0833 100644 --- a/docs/releaseNotes.yml +++ b/docs/releaseNotes.yml @@ -125,6 +125,16 @@ items: With the ugprade to Envoy 1.22, $productName$ can now be configured to listen for HTTP/3 connections using QUIC and the UDP network protocol. It currently only supports for connections between downstream clients and $productName$. + - title: Add option to enable envoy readiness endpoint from worker + type: feature + body: >- + /ready endpoint used by emissary is using the admin port (8001 by default). + This generates a problem during config reloads with large configs as the + admin thread is blocking so the /ready endpoint can be very slow to + answer (in the order of several seconds, even more). + The new feature allows to enable a specific envoy listener that can answer /ready calls + from the workers so the endpoint is always fast and it does not suffers from single threaded + admin thread slowness on config reloads and other slow endpoints handled by the admin thread - version: 2.3.1 date: '2022-06-09' notes: diff --git a/pkg/ambex/main.go b/pkg/ambex/main.go index 8e5dc81c32a..1f0e5cacbe8 100644 --- a/pkg/ambex/main.go +++ b/pkg/ambex/main.go @@ -89,6 +89,7 @@ import ( _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/ext_authz/v3" _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/grpc_stats/v3" _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/gzip/v3" + _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/health_check/v3" _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/lua/v3" _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/ratelimit/v3" _ "github.com/emissary-ingress/emissary/v3/pkg/api/envoy/extensions/filters/http/rbac/v3" diff --git a/python/ambassador/constants.py b/python/ambassador/constants.py index 77e45014fc1..4890a9c8ac7 100644 --- a/python/ambassador/constants.py +++ b/python/ambassador/constants.py @@ -2,6 +2,7 @@ class Constants: SERVICE_PORT_HTTP = 8080 SERVICE_PORT_HTTPS = 8443 ADMIN_PORT = 8001 + READY_PORT = -1 DIAG_PORT = 8877 DIAG_PORT_ALT = 8004 SERVICE_PORT_AGENT = 9900 diff --git a/python/ambassador/envoy/v3/v3config.py b/python/ambassador/envoy/v3/v3config.py index d40be9897cd..bad47db0870 100644 --- a/python/ambassador/envoy/v3/v3config.py +++ b/python/ambassador/envoy/v3/v3config.py @@ -20,6 +20,7 @@ from ..common import EnvoyConfig, sanitize_pre_json from .v3admin import V3Admin +from .v3ready import V3Ready from .v3bootstrap import V3Bootstrap from .v3route import V3Route, V3RouteVariants from .v3listener import V3Listener @@ -67,6 +68,7 @@ def __init__(self, ir: 'IR', cache: Optional[Cache]=None) -> None: V3Cluster.generate(self) V3StaticResources.generate(self) V3Bootstrap.generate(self) + V3Ready.generate(self) def has_listeners(self) -> bool: return len(self.listeners) > 0 diff --git a/python/ambassador/envoy/v3/v3ready.py b/python/ambassador/envoy/v3/v3ready.py new file mode 100644 index 00000000000..4d71f3be6aa --- /dev/null +++ b/python/ambassador/envoy/v3/v3ready.py @@ -0,0 +1,156 @@ +# Copyright 2021 Datawire. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +from typing import List, TYPE_CHECKING + +from .v3listener import V3Listener + +if TYPE_CHECKING: + from . import V3Config # pragma: no cover + + +class V3Ready(dict): + + @classmethod + def generate(cls, config: 'V3Config') -> None: + # Inject the ready listener to the list of listeners if enabled + rport = config.ir.aconf.module_lookup('ambassador', 'ready_port', -1) + if rport <= 0: + config.ir.logger.info(f"V3Ready: ==== disabled") + return + + rip = config.ir.aconf.module_lookup('ambassador', 'ready_ip', '127.0.0.1') + rlog = config.ir.aconf.module_lookup('ambassador', 'ready_log', True) + + config.ir.logger.info(f"V3Ready: ==== listen on %s:%s" % (rip, rport)) + + typed_config = { + '@type': 'type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager', + 'stat_prefix': 'ready_http', + 'route_config': { + 'name': 'local_route' + }, + 'http_filters': [ + { + 'name': 'envoy.filters.http.health_check', + 'typed_config': { + '@type': 'type.googleapis.com/envoy.extensions.filters.http.health_check.v3.HealthCheck', + 'pass_through_mode': False, + 'headers': [ + { + 'name': ':path', + 'exact_match': '/ready' + } + ] + } + }, + { + 'name': 'envoy.filters.http.router' + } + ] + } + if rlog: + typed_config['access_log'] = cls.access_log(config) + + ready_listener = { + 'name': 'ambassador-listener-ready-%s-%s' % (rip, rport), + 'address': { + 'socket_address': { + 'address': rip, + 'port_value': rport, + 'protocol': 'TCP' + } + }, + 'filter_chains': [ + { + 'filters': [ + { + 'name': 'envoy.filters.network.http_connection_manager', + 'typed_config': typed_config, + } + ] + } + ] + } + + config.static_resources['listeners'].append(ready_listener) + + # access_log constructs the access_log configuration for this V3Listener + @classmethod + def access_log(cls, config: 'V3Config') -> List[dict]: + access_log: List[dict] = [] + + # Use sane access log spec in JSON + if config.ir.ambassador_module.envoy_log_type.lower() == "json": + log_format = config.ir.ambassador_module.get('envoy_log_format', None) + if log_format is None: + log_format = { + 'start_time': '%START_TIME%', + 'method': '%REQ(:METHOD)%', + 'path': '%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%', + 'protocol': '%PROTOCOL%', + 'response_code': '%RESPONSE_CODE%', + 'response_flags': '%RESPONSE_FLAGS%', + 'bytes_received': '%BYTES_RECEIVED%', + 'bytes_sent': '%BYTES_SENT%', + 'duration': '%DURATION%', + 'upstream_service_time': '%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%', + 'x_forwarded_for': '%REQ(X-FORWARDED-FOR)%', + 'user_agent': '%REQ(USER-AGENT)%', + 'request_id': '%REQ(X-REQUEST-ID)%', + 'authority': '%REQ(:AUTHORITY)%', + 'upstream_host': '%UPSTREAM_HOST%', + 'upstream_cluster': '%UPSTREAM_CLUSTER%', + 'upstream_local_address': '%UPSTREAM_LOCAL_ADDRESS%', + 'downstream_local_address': '%DOWNSTREAM_LOCAL_ADDRESS%', + 'downstream_remote_address': '%DOWNSTREAM_REMOTE_ADDRESS%', + 'requested_server_name': '%REQUESTED_SERVER_NAME%', + 'istio_policy_status': '%DYNAMIC_METADATA(istio.mixer:status)%', + 'upstream_transport_failure_reason': '%UPSTREAM_TRANSPORT_FAILURE_REASON%' + } + + tracing_config = config.ir.tracing + if tracing_config and tracing_config.driver == 'envoy.tracers.datadog': + log_format['dd.trace_id'] = '%REQ(X-DATADOG-TRACE-ID)%' + log_format['dd.span_id'] = '%REQ(X-DATADOG-PARENT-ID)%' + + access_log.append({ + 'name': 'envoy.access_loggers.file', + 'typed_config': { + '@type': 'type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog', + 'path': config.ir.ambassador_module.envoy_log_path, + 'json_format': log_format + } + }) + else: + # Use a sane access log spec + log_format = config.ir.ambassador_module.get('envoy_log_format', None) + + if not log_format: + log_format = 'ACCESS [%START_TIME%] \"%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%\" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% \"%REQ(X-FORWARDED-FOR)%\" \"%REQ(USER-AGENT)%\" \"%REQ(X-REQUEST-ID)%\" \"%REQ(:AUTHORITY)%\" \"%UPSTREAM_HOST%\"' + + access_log.append({ + 'name': 'envoy.access_loggers.file', + 'typed_config': { + '@type': 'type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog', + 'path': config.ir.ambassador_module.envoy_log_path, + 'log_format': { + 'text_format_source': { + 'inline_string': log_format + '\n' + } + } + } + }) + + return access_log diff --git a/python/ambassador/ir/irambassador.py b/python/ambassador/ir/irambassador.py index 70a35598a8d..810798859de 100644 --- a/python/ambassador/ir/irambassador.py +++ b/python/ambassador/ir/irambassador.py @@ -30,6 +30,7 @@ class IRAmbassador (IRResource): AModTransparentKeys: ClassVar = [ 'add_linkerd_headers', 'admin_port', + 'ready_port', 'auth_enabled', 'allow_chunked_length', 'buffer_limit_bytes', @@ -120,6 +121,7 @@ def __init__(self, ir: 'IR', aconf: Config, ir=ir, aconf=aconf, rkey=rkey, kind=kind, name=name, service_port=Constants.SERVICE_PORT_HTTP, admin_port=Constants.ADMIN_PORT, + ready_port=Constants.READY_PORT, auth_enabled=None, enable_ipv6=False, envoy_log_type="text",