From 072cab08da8dd42e1804efd4185445a77f536628 Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Mon, 19 Aug 2024 09:14:41 -0400 Subject: [PATCH] Move cluster dns config and validate in strict dns clusters Signed-off-by: Steven Jin Xuan --- api/BUILD | 1 + api/envoy/config/cluster/v3/BUILD | 1 + api/envoy/config/cluster/v3/cluster.proto | 27 +--- api/envoy/extensions/clusters/dns/v3/BUILD | 9 ++ .../extensions/clusters/dns/v3/cluster.proto | 130 ++++++++++++++++++ .../common/dynamic_forward_proxy/v3/BUILD | 1 + .../dynamic_forward_proxy/v3/dns_cache.proto | 3 +- api/versioning/BUILD | 1 + source/extensions/clusters/strict_dns/BUILD | 1 + .../clusters/strict_dns/strict_dns_cluster.cc | 61 ++++++-- .../clusters/strict_dns/strict_dns_cluster.h | 12 +- source/extensions/extensions_metadata.yaml | 6 + 12 files changed, 218 insertions(+), 35 deletions(-) create mode 100644 api/envoy/extensions/clusters/dns/v3/BUILD create mode 100644 api/envoy/extensions/clusters/dns/v3/cluster.proto diff --git a/api/BUILD b/api/BUILD index 096373e79fc7a..a8949c917c8dd 100644 --- a/api/BUILD +++ b/api/BUILD @@ -137,6 +137,7 @@ proto_library( "//envoy/extensions/access_loggers/wasm/v3:pkg", "//envoy/extensions/bootstrap/internal_listener/v3:pkg", "//envoy/extensions/clusters/aggregate/v3:pkg", + "//envoy/extensions/clusters/dns/v3:pkg", "//envoy/extensions/clusters/dynamic_forward_proxy/v3:pkg", "//envoy/extensions/clusters/redis/v3:pkg", "//envoy/extensions/common/async_files/v3:pkg", diff --git a/api/envoy/config/cluster/v3/BUILD b/api/envoy/config/cluster/v3/BUILD index 80d74b61cd6a0..df171f9a734d1 100644 --- a/api/envoy/config/cluster/v3/BUILD +++ b/api/envoy/config/cluster/v3/BUILD @@ -9,6 +9,7 @@ api_proto_package( "//envoy/annotations:pkg", "//envoy/config/core/v3:pkg", "//envoy/config/endpoint/v3:pkg", + "//envoy/extensions/clusters/dns/v3:pkg", "//envoy/type/metadata/v3:pkg", "//envoy/type/v3:pkg", "@com_github_cncf_xds//udpa/annotations:pkg", diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto index 0d2d6f1918ecc..57ab55350c470 100644 --- a/api/envoy/config/cluster/v3/cluster.proto +++ b/api/envoy/config/cluster/v3/cluster.proto @@ -13,6 +13,7 @@ import "envoy/config/core/v3/health_check.proto"; import "envoy/config/core/v3/protocol.proto"; import "envoy/config/core/v3/resolver.proto"; import "envoy/config/endpoint/v3/endpoint.proto"; +import "envoy/extensions/clusters/dns/v3/cluster.proto"; import "envoy/type/metadata/v3/metadata.proto"; import "envoy/type/v3/percent.proto"; @@ -45,7 +46,7 @@ message ClusterCollection { } // Configuration for a single upstream cluster. -// [#next-free-field: 59] +// [#next-free-field: 60] message Cluster { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster"; @@ -678,24 +679,6 @@ message Cluster { core.v3.HealthStatusSet override_host_status = 8; } - message RefreshRate { - option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster.RefreshRate"; - - // Specifies the base interval between refreshes. This parameter is required and must be greater - // than zero and less than - // :ref:`max_interval `. - google.protobuf.Duration base_interval = 1 [(validate.rules).duration = { - required: true - gt {nanos: 1000000} - }]; - - // Specifies the maximum interval between refreshes. This parameter is optional, but must be - // greater than or equal to the - // :ref:`base_interval ` if set. The default - // is 10 times the :ref:`base_interval `. - google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {nanos: 1000000}}]; - } - message PreconnectPolicy { // Indicates how many streams (rounded up) can be anticipated per-upstream for each // incoming stream. This is useful for high-QPS or latency-sensitive services. Preconnecting @@ -944,6 +927,9 @@ message Cluster { // [#next-major-version: make this a list of typed extensions.] map typed_extension_protocol_options = 36; + // [#extension-category: envoy.clusters.dns] + envoy.extensions.clusters.dns.v3.DnsConfig dns_config = 59; + // If the DNS refresh rate is specified and the cluster type is either // :ref:`STRICT_DNS`, // or :ref:`LOGICAL_DNS`, @@ -975,7 +961,8 @@ message Cluster { // other than :ref:`STRICT_DNS` and // :ref:`LOGICAL_DNS` this setting is // ignored. - RefreshRate dns_failure_refresh_rate = 44; + + envoy.extensions.clusters.dns.v3.DnsConfig.RefreshRate dns_failure_refresh_rate = 44; // Optional configuration for setting cluster's DNS refresh rate. If the value is set to true, // cluster's DNS refresh rate will be set to resource record's TTL which comes from DNS diff --git a/api/envoy/extensions/clusters/dns/v3/BUILD b/api/envoy/extensions/clusters/dns/v3/BUILD new file mode 100644 index 0000000000000..29ebf0741406e --- /dev/null +++ b/api/envoy/extensions/clusters/dns/v3/BUILD @@ -0,0 +1,9 @@ +# DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py. + +load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package") + +licenses(["notice"]) # Apache 2 + +api_proto_package( + deps = ["@com_github_cncf_xds//udpa/annotations:pkg"], +) diff --git a/api/envoy/extensions/clusters/dns/v3/cluster.proto b/api/envoy/extensions/clusters/dns/v3/cluster.proto new file mode 100644 index 0000000000000..19f01c6e512c3 --- /dev/null +++ b/api/envoy/extensions/clusters/dns/v3/cluster.proto @@ -0,0 +1,130 @@ +syntax = "proto3"; + +package envoy.extensions.clusters.dns.v3; + +import "google/protobuf/any.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/struct.proto"; +import "google/protobuf/wrappers.proto"; + +import "udpa/annotations/migrate.proto"; +import "udpa/annotations/security.proto"; +import "udpa/annotations/status.proto"; +import "udpa/annotations/versioning.proto"; +import "validate/validate.proto"; + +option java_package = "io.envoyproxy.envoy.extensions.clusters.dns.v3"; +option java_outer_classname = "ClusterProto"; +option java_multiple_files = true; +option go_package = "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/dns/v3;dnsv3"; +option (udpa.annotations.file_status).package_version_status = ACTIVE; + +// import "envoy/config/core/v3/extension.proto"; + +// [#protodoc-title: DNS cluster configuration] + +// Configuration for the dynamic forward proxy cluster. See the :ref:`architecture overview +// ` for more information. +// [#extension: envoy.clusters.dns] + +// [#next-free-field: 55] +message DnsConfig { + // When V4_ONLY is selected, the DNS resolver will only perform a lookup for + // addresses in the IPv4 family. If V6_ONLY is selected, the DNS resolver will + // only perform a lookup for addresses in the IPv6 family. If AUTO is + // specified, the DNS resolver will first perform a lookup for addresses in + // the IPv6 family and fallback to a lookup for addresses in the IPv4 family. + // This is semantically equivalent to a non-existent V6_PREFERRED option. + // AUTO is a legacy name that is more opaque than + // necessary and will be deprecated in favor of V6_PREFERRED in a future major version of the API. + // If V4_PREFERRED is specified, the DNS resolver will first perform a lookup for addresses in the + // IPv4 family and fallback to a lookup for addresses in the IPv6 family. i.e., the callback + // target will only get v6 addresses if there were NO v4 addresses to return. + // If ALL is specified, the DNS resolver will perform a lookup for both IPv4 and IPv6 families, + // and return all resolved addresses. When this is used, Happy Eyeballs will be enabled for + // upstream connections. Refer to :ref:`Happy Eyeballs Support ` + // for more information. + // For cluster types other than + // :ref:`STRICT_DNS` and + // :ref:`LOGICAL_DNS`, + // this setting is + // ignored. + // [#next-major-version: deprecate AUTO in favor of a V6_PREFERRED option.] + enum LookupFamily { + AUTO = 0; + V4_ONLY = 1; + V6_ONLY = 2; + V4_PREFERRED = 3; + ALL = 4; + } + + message RefreshRate { + option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster.RefreshRate"; + + // Specifies the base interval between refreshes. This parameter is required and must be greater + // than zero and less than + // :ref:`max_interval `. + google.protobuf.Duration base_interval = 1 [(validate.rules).duration = { + required: true + gt {nanos: 1000000} + }]; + + // Specifies the maximum interval between refreshes. This parameter is optional, but must be + // greater than or equal to the + // :ref:`base_interval ` if set. The default + // is 10 times the :ref:`base_interval `. + google.protobuf.Duration max_interval = 2 [(validate.rules).duration = {gt {nanos: 1000000}}]; + } + + // If the DNS refresh rate is specified and the cluster type is either + // :ref:`STRICT_DNS`, + // or :ref:`LOGICAL_DNS`, + // this value is used as the cluster’s DNS refresh + // rate. The value configured must be at least 1ms. If this setting is not specified, the + // value defaults to 5000ms. For cluster types other than + // :ref:`STRICT_DNS` + // and :ref:`LOGICAL_DNS` + // this setting is ignored. + google.protobuf.Duration refresh_rate = 16 [(validate.rules).duration = {gt {nanos: 1000000}}]; + + // If the DNS failure refresh rate is specified and the cluster type is either + // :ref:`STRICT_DNS`, + // or :ref:`LOGICAL_DNS`, + // this is used as the cluster’s DNS refresh rate when requests are failing. If this setting is + // not specified, the failure refresh rate defaults to the DNS refresh rate. For cluster types + // other than :ref:`STRICT_DNS` and + // :ref:`LOGICAL_DNS` this setting is + // ignored. + RefreshRate dns_failure_refresh_rate = 44; + + // Optional configuration for setting cluster's DNS refresh rate. If the value is set to true, + // cluster's DNS refresh rate will be set to resource record's TTL which comes from DNS + // resolution. + bool respect_ttl = 39; + + // The DNS IP address resolution policy. If this setting is not specified, the + // value defaults to + // :ref:`AUTO`. + LookupFamily lookup_family = 17 [(validate.rules).enum = {defined_only: true}]; + + // DNS resolver type configuration extension. This extension can be used to configure c-ares, apple, + // or any other DNS resolver types and the related parameters. + // For example, an object of + // :ref:`CaresDnsResolverConfig ` + // can be packed into this ``typed_dns_resolver_config``. This configuration replaces the + // :ref:`dns_resolution_config ` + // configuration. + // During the transition period when both ``dns_resolution_config`` and ``typed_dns_resolver_config`` exists, + // when ``typed_dns_resolver_config`` is in place, Envoy will use it and ignore ``dns_resolution_config``. + // When ``typed_dns_resolver_config`` is missing, the default behavior is in place. + // [#extension-category: envoy.network.dns_resolver] + // FIXMEcore.v3.TypedExtensionConfig typed_resolver_config = 55; + + // Optional configuration for having cluster readiness block on warm-up. Currently, only applicable for + // :ref:`STRICT_DNS`, + // or :ref:`LOGICAL_DNS`, + // or :ref:`Redis Cluster`. + // If true, cluster readiness blocks on warm-up. If false, the cluster will complete + // initialization whether or not warm-up has completed. Defaults to true. + google.protobuf.BoolValue wait_for_warm_on_init = 54; +} diff --git a/api/envoy/extensions/common/dynamic_forward_proxy/v3/BUILD b/api/envoy/extensions/common/dynamic_forward_proxy/v3/BUILD index a220c748ba7f9..9cc8ec4e1525e 100644 --- a/api/envoy/extensions/common/dynamic_forward_proxy/v3/BUILD +++ b/api/envoy/extensions/common/dynamic_forward_proxy/v3/BUILD @@ -10,6 +10,7 @@ api_proto_package( "//envoy/config/cluster/v3:pkg", "//envoy/config/common/key_value/v3:pkg", "//envoy/config/core/v3:pkg", + "//envoy/extensions/clusters/dns/v3:pkg", "@com_github_cncf_xds//udpa/annotations:pkg", ], ) diff --git a/api/envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.proto b/api/envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.proto index eae3b8f742619..ceb15333f71fa 100644 --- a/api/envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.proto +++ b/api/envoy/extensions/common/dynamic_forward_proxy/v3/dns_cache.proto @@ -7,6 +7,7 @@ import "envoy/config/common/key_value/v3/config.proto"; import "envoy/config/core/v3/address.proto"; import "envoy/config/core/v3/extension.proto"; import "envoy/config/core/v3/resolver.proto"; +import "envoy/extensions/clusters/dns/v3/cluster.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/wrappers.proto"; @@ -97,7 +98,7 @@ message DnsCacheConfig { // If the DNS failure refresh rate is specified, // this is used as the cache's DNS refresh rate when DNS requests are failing. If this setting is // not specified, the failure refresh rate defaults to the dns_refresh_rate. - config.cluster.v3.Cluster.RefreshRate dns_failure_refresh_rate = 6; + clusters.dns.v3.DnsConfig.RefreshRate dns_failure_refresh_rate = 6; // The config of circuit breakers for resolver. It provides a configurable threshold. // Envoy will use dns cache circuit breakers with default settings even if this value is not set. diff --git a/api/versioning/BUILD b/api/versioning/BUILD index bfc572f7f3bde..03227425b471d 100644 --- a/api/versioning/BUILD +++ b/api/versioning/BUILD @@ -75,6 +75,7 @@ proto_library( "//envoy/extensions/access_loggers/wasm/v3:pkg", "//envoy/extensions/bootstrap/internal_listener/v3:pkg", "//envoy/extensions/clusters/aggregate/v3:pkg", + "//envoy/extensions/clusters/dns/v3:pkg", "//envoy/extensions/clusters/dynamic_forward_proxy/v3:pkg", "//envoy/extensions/clusters/redis/v3:pkg", "//envoy/extensions/common/async_files/v3:pkg", diff --git a/source/extensions/clusters/strict_dns/BUILD b/source/extensions/clusters/strict_dns/BUILD index 72fdb583d55b6..a138d4a6f0a0d 100644 --- a/source/extensions/clusters/strict_dns/BUILD +++ b/source/extensions/clusters/strict_dns/BUILD @@ -15,6 +15,7 @@ envoy_cc_extension( # prevously considered core code. visibility = ["//visibility:public"], deps = [ + "//source/common/common:random_generator_lib", "//source/common/upstream:cluster_factory_includes", "//source/common/upstream:upstream_includes", "@envoy_api//envoy/config/cluster/v3:pkg_cc_proto", diff --git a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc index b379ef1e890e4..94c47ca3e8733 100644 --- a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc +++ b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc @@ -1,12 +1,15 @@ #include "source/extensions/clusters/strict_dns/strict_dns_cluster.h" #include +#include #include "envoy/common/exception.h" #include "envoy/config/cluster/v3/cluster.pb.h" #include "envoy/config/endpoint/v3/endpoint.pb.h" #include "envoy/config/endpoint/v3/endpoint_components.pb.h" +#include "source/common/common/random_generator.h" + namespace Envoy { namespace Upstream { @@ -28,11 +31,43 @@ StrictDnsClusterImpl::StrictDnsClusterImpl(const envoy::config::cluster::v3::Clu absl::Status& creation_status) : BaseDynamicClusterImpl(cluster, context, creation_status), load_assignment_(cluster.load_assignment()), - local_info_(context.serverFactoryContext().localInfo()), dns_resolver_(dns_resolver), - dns_refresh_rate_ms_( - std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(cluster, dns_refresh_rate, 5000))), - dns_jitter_ms_(PROTOBUF_GET_MS_OR_DEFAULT(cluster, dns_jitter, 0)), - respect_dns_ttl_(cluster.respect_dns_ttl()) { + local_info_(context.serverFactoryContext().localInfo()), dns_resolver_(dns_resolver) { + // dns_refresh_rate_ms_( + // std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(cluster, dns_refresh_rate, + // 5000))), + // dns_jitter_ms_(PROTOBUF_GET_MS_OR_DEFAULT(cluster, dns_jitter, 0)), + // respect_dns_ttl_(cluster.respect_dns_ttl()) + + if (cluster.has_dns_config()) { + if (cluster.has_dns_refresh_rate() /* deprecated */) { + throw EnvoyException("Only one of dns_refresh_rate or dns_config can be specified."); + } + if (cluster.has_dns_jitter() /* deprecated */) { + throw EnvoyException("Only one of dns_jitter or dns_config can be specified."); + } + if (cluster.has_typed_dns_resolver_config() /* deprecated */) { + throw EnvoyException( + "Only one of typed_dns_resolution_config or dns_config can be specified."); + } + if (cluster.dns_lookup_family() != + envoy::config::cluster::v3::Cluster_DnsLookupFamily::Cluster_DnsLookupFamily_AUTO && + cluster.dns_config().lookup_family() != + envoy::extensions::clusters::dns::v3::DnsConfig::LookupFamily:: + DnsConfig_LookupFamily_AUTO /* deprecated */) { + throw EnvoyException("Only one of dns_lookup_family or dns_config can be specified."); + } + if (cluster.has_dns_refresh_rate() /* deprecated */) { + throw EnvoyException("Only one of dns_refresh_rate or dns_config can be specified."); + } + if (cluster.has_dns_failure_refresh_rate() /* deprecated */) { + throw EnvoyException("Only one of dns_failure_refresh_rate or dns_config can be specified."); + } + + if (cluster.has_wait_for_warm_on_init() /* deprecated */) { + throw EnvoyException("Only one of wait_for_warm_on_init or dns_config can be specified."); + } + // no good way of checking respect_ttl + } failure_backoff_strategy_ = Config::Utility::prepareDnsRefreshStrategy( cluster, dns_refresh_rate_ms_.count(), @@ -51,7 +86,7 @@ StrictDnsClusterImpl::StrictDnsClusterImpl(const envoy::config::cluster::v3::Clu resolve_targets.emplace_back(new ResolveTarget( *this, context.serverFactoryContext().mainThreadDispatcher(), socket_address.address(), - socket_address.port_value(), locality_lb_endpoint, lb_endpoint)); + socket_address.port_value(), locality_lb_endpoint, lb_endpoint, random_generator_)); } } resolve_targets_ = std::move(resolve_targets); @@ -103,13 +138,14 @@ StrictDnsClusterImpl::ResolveTarget::ResolveTarget( StrictDnsClusterImpl& parent, Event::Dispatcher& dispatcher, const std::string& dns_address, const uint32_t dns_port, const envoy::config::endpoint::v3::LocalityLbEndpoints& locality_lb_endpoint, - const envoy::config::endpoint::v3::LbEndpoint& lb_endpoint) + const envoy::config::endpoint::v3::LbEndpoint& lb_endpoint, + Random::RandomGeneratorImpl& random_generator) : parent_(parent), locality_lb_endpoints_(locality_lb_endpoint), lb_endpoint_(lb_endpoint), dns_address_(dns_address), hostname_(lb_endpoint_.endpoint().hostname().empty() ? dns_address_ : lb_endpoint_.endpoint().hostname()), - port_(dns_port), - resolve_timer_(dispatcher.createTimer([this]() -> void { startResolve(); })) {} + port_(dns_port), resolve_timer_(dispatcher.createTimer([this]() -> void { startResolve(); })), + random_generator_(random_generator) {} StrictDnsClusterImpl::ResolveTarget::~ResolveTarget() { if (active_query_) { @@ -188,7 +224,12 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() { if (!response.empty() && parent_.respect_dns_ttl_ && ttl_refresh_rate != std::chrono::seconds(0)) { - final_refresh_rate = ttl_refresh_rate; + std::chrono::milliseconds jitter = + std::chrono::milliseconds(random_generator_.random() % 512); // FIXME + if (jitter >= ttl_refresh_rate) { + jitter = std::chrono::milliseconds(0); + } + final_refresh_rate = ttl_refresh_rate - jitter; ASSERT(ttl_refresh_rate != std::chrono::seconds::max() && final_refresh_rate.count() > 0); } diff --git a/source/extensions/clusters/strict_dns/strict_dns_cluster.h b/source/extensions/clusters/strict_dns/strict_dns_cluster.h index 72b4070e5a85b..80c24260f1f8c 100644 --- a/source/extensions/clusters/strict_dns/strict_dns_cluster.h +++ b/source/extensions/clusters/strict_dns/strict_dns_cluster.h @@ -3,6 +3,7 @@ #include "envoy/config/cluster/v3/cluster.pb.h" #include "envoy/config/endpoint/v3/endpoint_components.pb.h" +#include "source/common/common/random_generator.h" #include "source/common/upstream/cluster_factory_impl.h" #include "source/common/upstream/upstream_impl.h" @@ -31,7 +32,8 @@ class StrictDnsClusterImpl : public BaseDynamicClusterImpl { ResolveTarget(StrictDnsClusterImpl& parent, Event::Dispatcher& dispatcher, const std::string& dns_address, const uint32_t dns_port, const envoy::config::endpoint::v3::LocalityLbEndpoints& locality_lb_endpoint, - const envoy::config::endpoint::v3::LbEndpoint& lb_endpoint); + const envoy::config::endpoint::v3::LbEndpoint& lb_endpoint, + Random::RandomGeneratorImpl& random_generator); ~ResolveTarget(); void startResolve(); @@ -44,6 +46,7 @@ class StrictDnsClusterImpl : public BaseDynamicClusterImpl { const uint32_t port_; const Event::TimerPtr resolve_timer_; HostVector hosts_; + Random::RandomGeneratorImpl& random_generator_; // Host map for current resolve target. When we have multiple resolve targets, multiple targets // may contain two different hosts with the same address. This has two effects: @@ -68,13 +71,14 @@ class StrictDnsClusterImpl : public BaseDynamicClusterImpl { const LocalInfo::LocalInfo& local_info_; Network::DnsResolverSharedPtr dns_resolver_; std::list resolve_targets_; - const std::chrono::milliseconds dns_refresh_rate_ms_; - const std::chrono::milliseconds dns_jitter_ms_; + std::chrono::milliseconds dns_refresh_rate_ms_; + std::chrono::milliseconds dns_jitter_ms_; BackOffStrategyPtr failure_backoff_strategy_; - const bool respect_dns_ttl_; + bool respect_dns_ttl_; Network::DnsLookupFamily dns_lookup_family_; uint32_t overprovisioning_factor_; bool weighted_priority_health_; + Random::RandomGeneratorImpl random_generator_; }; /** diff --git a/source/extensions/extensions_metadata.yaml b/source/extensions/extensions_metadata.yaml index 14bc8765b4b9d..0aff3975286b1 100644 --- a/source/extensions/extensions_metadata.yaml +++ b/source/extensions/extensions_metadata.yaml @@ -94,6 +94,12 @@ envoy.clusters.aggregate: - envoy.clusters security_posture: requires_trusted_downstream_and_upstream status: stable +# FIXME +envoy.clusters.dns: + categories: + - envoy.clusters + security_posture: unknown + status: wip envoy.clusters.dynamic_forward_proxy: categories: - envoy.clusters