From ab3fbaabe4e1c7858eb18e0ab8263ebe36ce3b32 Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 4 Dec 2024 14:37:53 -0500 Subject: [PATCH 1/2] Backport #36953 Signed-off-by: Steven Jin Xuan --- api/envoy/config/cluster/v3/cluster.proto | 2 +- changelogs/current.yaml | 4 ++ .../logical_dns/logical_dns_cluster.cc | 9 ++- .../clusters/strict_dns/strict_dns_cluster.cc | 10 ++- test/common/upstream/upstream_impl_test.cc | 59 +++++++++++++++++ .../logical_dns/logical_dns_cluster_test.cc | 64 ++++++++++++++++++- 6 files changed, 142 insertions(+), 6 deletions(-) diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto index 0d2d6f1918ec..079a1e497758 100644 --- a/api/envoy/config/cluster/v3/cluster.proto +++ b/api/envoy/config/cluster/v3/cluster.proto @@ -965,7 +965,7 @@ message Cluster { // :ref:`STRICT_DNS` // and :ref:`LOGICAL_DNS` // this setting is ignored. - google.protobuf.Duration dns_jitter = 58; + google.protobuf.Duration dns_jitter = 58 [(validate.rules).duration = {gte {}}]; // If the DNS failure refresh rate is specified and the cluster type is either // :ref:`STRICT_DNS`, diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 6bbb5f3b2aaa..7b595962b6af 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -26,6 +26,10 @@ bug_fixes: - area: validation/tools change: | Add back missing extension for ``schema_validator_tool``. +- area: DNS + change: | + Fixed bug where setting ``dns_jitter `` to large values caused Envoy Bug + to fire. removed_config_or_runtime: # *Normally occurs at the end of the* :ref:`deprecation period ` diff --git a/source/extensions/clusters/logical_dns/logical_dns_cluster.cc b/source/extensions/clusters/logical_dns/logical_dns_cluster.cc index ef3d443af702..264f522c12b1 100644 --- a/source/extensions/clusters/logical_dns/logical_dns_cluster.cc +++ b/source/extensions/clusters/logical_dns/logical_dns_cluster.cc @@ -151,7 +151,14 @@ void LogicalDnsCluster::startResolve() { final_refresh_rate = addrinfo.ttl_; } if (dns_jitter_ms_.count() != 0) { - final_refresh_rate += std::chrono::milliseconds(random_.random()) % dns_jitter_ms_; + // Note that `random_.random()` returns a uint64 while + // `dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64. + // Thus, the modulo of the two will be a positive as long as + // `dns_jitter_ms_.count()` is positive. + // It is important that this be positive, otherwise `final_refresh_rate` could be + // negative causing Envoy to crash. + final_refresh_rate += + std::chrono::milliseconds(random_.random() % dns_jitter_ms_.count()); } ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_, final_refresh_rate.count()); diff --git a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc index b379ef1e890e..64641eedbcbf 100644 --- a/source/extensions/clusters/strict_dns/strict_dns_cluster.cc +++ b/source/extensions/clusters/strict_dns/strict_dns_cluster.cc @@ -193,8 +193,14 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() { final_refresh_rate.count() > 0); } if (parent_.dns_jitter_ms_.count() > 0) { - final_refresh_rate += - std::chrono::milliseconds(parent_.random_.random()) % parent_.dns_jitter_ms_; + // Note that `parent_.random_.random()` returns a uint64 while + // `parent_.dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64. + // Thus, the modulo of the two will be a positive as long as + // `parent_dns_jitter_ms_.count()` is positive. + // It is important that this be positive, otherwise `final_refresh_rate` could be + // negative causing Envoy to crash. + final_refresh_rate += std::chrono::milliseconds(parent_.random_.random() % + parent_.dns_jitter_ms_.count()); } ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_, diff --git a/test/common/upstream/upstream_impl_test.cc b/test/common/upstream/upstream_impl_test.cc index fc78e4c7189d..2751f9144eed 100644 --- a/test/common/upstream/upstream_impl_test.cc +++ b/test/common/upstream/upstream_impl_test.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -1490,6 +1491,30 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateNoJitter) { TestUtility::makeDnsResponse({}, std::chrono::seconds(5))); } +TEST_F(StrictDnsClusterImplTest, NegativeDnsJitter) { + const std::string yaml = R"EOF( + name: name + type: STRICT_DNS + lb_policy: ROUND_ROBIN + dns_refresh_rate: 4s + dns_jitter: -1s + load_assignment: + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: localhost1 + port_value: 11001 + )EOF"; + envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml); + Envoy::Upstream::ClusterFactoryContextImpl factory_context( + server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr, + false); + EXPECT_THROW_WITH_MESSAGE( + auto x = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_), + EnvoyException, "Invalid duration: Expected positive duration: seconds: -1\n"); +} TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) { ResolverData resolver(*dns_resolver_, server_context_.dispatcher_); @@ -1533,6 +1558,40 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) { TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(ttl_s))); } +TEST_F(StrictDnsClusterImplTest, ExtremeJitter) { + ResolverData resolver(*dns_resolver_, server_context_.dispatcher_); + + const std::string yaml = R"EOF( + name: name + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + dns_refresh_rate: 1s + dns_jitter: 1000s + respect_dns_ttl: true + load_assignment: + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: localhost1 + port_value: 11001 + )EOF"; + envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml); + Envoy::Upstream::ClusterFactoryContextImpl factory_context( + server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr, + false); + auto cluster = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_); + cluster->initialize([] {}); + + EXPECT_CALL(*resolver.timer_, enableTimer(testing::Ge(std::chrono::milliseconds(1000)), _)); + ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits::min())); + resolver.dns_callback_( + Network::DnsResolver::ResolutionStatus::Completed, "", + TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(1))); +} + // Ensures that HTTP/2 user defined SETTINGS parameter validation is enforced on clusters. TEST_F(StrictDnsClusterImplTest, Http2UserDefinedSettingsParametersValidation) { const std::string yaml = R"EOF( diff --git a/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc b/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc index 8c4a1c68d488..38fd4e95cac7 100644 --- a/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc +++ b/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -47,9 +48,11 @@ class LogicalDnsClusterTest : public Event::TestUsingSimulatedTime, public testi ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_)); } - void setupFromV3Yaml(const std::string& yaml) { + void setupFromV3Yaml(const std::string& yaml, bool expect_success = true) { ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_)); - resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_); + if (expect_success) { + resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_); + } NiceMock cm; envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml); Envoy::Upstream::ClusterFactoryContextImpl factory_context( @@ -647,6 +650,63 @@ TEST_F(LogicalDnsClusterTest, DNSRefreshHasJitter) { TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000))); } +TEST_F(LogicalDnsClusterTest, NegativeDnsJitter) { + const std::string yaml = R"EOF( + name: name + type: LOGICAL_DNS + dns_jitter: -1s + lb_policy: ROUND_ROBIN + dns_lookup_family: V4_ONLY + load_assignment: + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: foo.bar.com + port_value: 443 + )EOF"; + EXPECT_THROW_WITH_MESSAGE(setupFromV3Yaml(yaml, false), EnvoyException, + "Invalid duration: Expected positive duration: seconds: -1\n"); +} + +TEST_F(LogicalDnsClusterTest, ExtremeJitter) { + // When random returns large values, they were being reinterpreted as very negative values causing + // negative refresh rates. + const std::string jitter_yaml = R"EOF( + name: name + type: LOGICAL_DNS + dns_refresh_rate: 1s + dns_failure_refresh_rate: + base_interval: 7s + max_interval: 10s + connect_timeout: 0.25s + dns_jitter: 1000s + lb_policy: ROUND_ROBIN + # Since the following expectResolve() requires Network::DnsLookupFamily::V4Only we need to set + # dns_lookup_family to V4_ONLY explicitly for v2 .yaml config. + dns_lookup_family: V4_ONLY + load_assignment: + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: foo.bar.com + port_value: 443 + )EOF"; + + EXPECT_CALL(initialized_, ready()); + expectResolve(Network::DnsLookupFamily::V4Only, "foo.bar.com"); + setupFromV3Yaml(jitter_yaml); + EXPECT_CALL(membership_updated_, ready()); + EXPECT_CALL(*resolve_timer_, enableTimer(testing::Ge(std::chrono::milliseconds(4000)), _)); + ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits::min())); + dns_callback_( + Network::DnsResolver::ResolutionStatus::Completed, "", + TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000))); +} + } // namespace } // namespace Upstream } // namespace Envoy From 4bb4903d69f02d6a9c15a4d22c49581b0d699834 Mon Sep 17 00:00:00 2001 From: Steven Jin Xuan Date: Wed, 4 Dec 2024 21:20:16 -0500 Subject: [PATCH 2/2] Fix tests Signed-off-by: Steven Jin Xuan --- test/common/upstream/upstream_impl_test.cc | 2 +- .../extensions/clusters/logical_dns/logical_dns_cluster_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/common/upstream/upstream_impl_test.cc b/test/common/upstream/upstream_impl_test.cc index 2751f9144eed..97610e514c9f 100644 --- a/test/common/upstream/upstream_impl_test.cc +++ b/test/common/upstream/upstream_impl_test.cc @@ -1513,7 +1513,7 @@ TEST_F(StrictDnsClusterImplTest, NegativeDnsJitter) { false); EXPECT_THROW_WITH_MESSAGE( auto x = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_), - EnvoyException, "Invalid duration: Expected positive duration: seconds: -1\n"); + EnvoyException, "Expected positive duration: seconds: -1\n"); } TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) { ResolverData resolver(*dns_resolver_, server_context_.dispatcher_); diff --git a/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc b/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc index 38fd4e95cac7..be489add2a08 100644 --- a/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc +++ b/test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc @@ -667,7 +667,7 @@ TEST_F(LogicalDnsClusterTest, NegativeDnsJitter) { port_value: 443 )EOF"; EXPECT_THROW_WITH_MESSAGE(setupFromV3Yaml(yaml, false), EnvoyException, - "Invalid duration: Expected positive duration: seconds: -1\n"); + "Expected positive duration: seconds: -1\n"); } TEST_F(LogicalDnsClusterTest, ExtremeJitter) {