Skip to content

Commit

Permalink
Backport #36953
Browse files Browse the repository at this point in the history
Signed-off-by: Steven Jin Xuan <[email protected]>
  • Loading branch information
Stevenjin8 committed Dec 4, 2024
1 parent f632bf1 commit ab3fbaa
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 6 deletions.
2 changes: 1 addition & 1 deletion api/envoy/config/cluster/v3/cluster.proto
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ message Cluster {
// :ref:`STRICT_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.STRICT_DNS>`
// and :ref:`LOGICAL_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.LOGICAL_DNS>`
// this setting is ignored.
google.protobuf.Duration dns_jitter = 58;
google.protobuf.Duration dns_jitter = 58 [(validate.rules).duration = {gte {}}];

// If the DNS failure refresh rate is specified and the cluster type is either
// :ref:`STRICT_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.STRICT_DNS>`,
Expand Down
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ bug_fixes:
- area: validation/tools
change: |
Add back missing extension for ``schema_validator_tool``.
- area: DNS
change: |
Fixed bug where setting ``dns_jitter <envoy_v3_api_field_config.cluster.v3.Cluster.dns_jitter>`` to large values caused Envoy Bug
to fire.
removed_config_or_runtime:
# *Normally occurs at the end of the* :ref:`deprecation period <deprecated>`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,14 @@ void LogicalDnsCluster::startResolve() {
final_refresh_rate = addrinfo.ttl_;
}
if (dns_jitter_ms_.count() != 0) {
final_refresh_rate += std::chrono::milliseconds(random_.random()) % dns_jitter_ms_;
// Note that `random_.random()` returns a uint64 while
// `dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64.
// Thus, the modulo of the two will be a positive as long as
// `dns_jitter_ms_.count()` is positive.
// It is important that this be positive, otherwise `final_refresh_rate` could be
// negative causing Envoy to crash.
final_refresh_rate +=
std::chrono::milliseconds(random_.random() % dns_jitter_ms_.count());
}
ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_,
final_refresh_rate.count());
Expand Down
10 changes: 8 additions & 2 deletions source/extensions/clusters/strict_dns/strict_dns_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,14 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() {
final_refresh_rate.count() > 0);
}
if (parent_.dns_jitter_ms_.count() > 0) {
final_refresh_rate +=
std::chrono::milliseconds(parent_.random_.random()) % parent_.dns_jitter_ms_;
// Note that `parent_.random_.random()` returns a uint64 while
// `parent_.dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64.
// Thus, the modulo of the two will be a positive as long as
// `parent_dns_jitter_ms_.count()` is positive.
// It is important that this be positive, otherwise `final_refresh_rate` could be
// negative causing Envoy to crash.
final_refresh_rate += std::chrono::milliseconds(parent_.random_.random() %
parent_.dns_jitter_ms_.count());
}

ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_,
Expand Down
59 changes: 59 additions & 0 deletions test/common/upstream/upstream_impl_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <chrono>
#include <cstdint>
#include <limits>
#include <list>
#include <memory>
#include <string>
Expand Down Expand Up @@ -1490,6 +1491,30 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateNoJitter) {
TestUtility::makeDnsResponse({}, std::chrono::seconds(5)));
}

TEST_F(StrictDnsClusterImplTest, NegativeDnsJitter) {
const std::string yaml = R"EOF(
name: name
type: STRICT_DNS
lb_policy: ROUND_ROBIN
dns_refresh_rate: 4s
dns_jitter: -1s
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: localhost1
port_value: 11001
)EOF";
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr,
false);
EXPECT_THROW_WITH_MESSAGE(
auto x = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_),
EnvoyException, "Invalid duration: Expected positive duration: seconds: -1\n");
}
TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) {
ResolverData resolver(*dns_resolver_, server_context_.dispatcher_);

Expand Down Expand Up @@ -1533,6 +1558,40 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) {
TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(ttl_s)));
}

TEST_F(StrictDnsClusterImplTest, ExtremeJitter) {
ResolverData resolver(*dns_resolver_, server_context_.dispatcher_);

const std::string yaml = R"EOF(
name: name
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
dns_refresh_rate: 1s
dns_jitter: 1000s
respect_dns_ttl: true
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: localhost1
port_value: 11001
)EOF";
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr,
false);
auto cluster = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_);
cluster->initialize([] {});

EXPECT_CALL(*resolver.timer_, enableTimer(testing::Ge(std::chrono::milliseconds(1000)), _));
ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits<int64_t>::min()));
resolver.dns_callback_(
Network::DnsResolver::ResolutionStatus::Completed, "",
TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(1)));
}

// Ensures that HTTP/2 user defined SETTINGS parameter validation is enforced on clusters.
TEST_F(StrictDnsClusterImplTest, Http2UserDefinedSettingsParametersValidation) {
const std::string yaml = R"EOF(
Expand Down
64 changes: 62 additions & 2 deletions test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <chrono>
#include <limits>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -47,9 +48,11 @@ class LogicalDnsClusterTest : public Event::TestUsingSimulatedTime, public testi
ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_));
}

void setupFromV3Yaml(const std::string& yaml) {
void setupFromV3Yaml(const std::string& yaml, bool expect_success = true) {
ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_));
resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_);
if (expect_success) {
resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_);
}
NiceMock<MockClusterManager> cm;
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
Expand Down Expand Up @@ -647,6 +650,63 @@ TEST_F(LogicalDnsClusterTest, DNSRefreshHasJitter) {
TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000)));
}

TEST_F(LogicalDnsClusterTest, NegativeDnsJitter) {
const std::string yaml = R"EOF(
name: name
type: LOGICAL_DNS
dns_jitter: -1s
lb_policy: ROUND_ROBIN
dns_lookup_family: V4_ONLY
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: foo.bar.com
port_value: 443
)EOF";
EXPECT_THROW_WITH_MESSAGE(setupFromV3Yaml(yaml, false), EnvoyException,
"Invalid duration: Expected positive duration: seconds: -1\n");
}

TEST_F(LogicalDnsClusterTest, ExtremeJitter) {
// When random returns large values, they were being reinterpreted as very negative values causing
// negative refresh rates.
const std::string jitter_yaml = R"EOF(
name: name
type: LOGICAL_DNS
dns_refresh_rate: 1s
dns_failure_refresh_rate:
base_interval: 7s
max_interval: 10s
connect_timeout: 0.25s
dns_jitter: 1000s
lb_policy: ROUND_ROBIN
# Since the following expectResolve() requires Network::DnsLookupFamily::V4Only we need to set
# dns_lookup_family to V4_ONLY explicitly for v2 .yaml config.
dns_lookup_family: V4_ONLY
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: foo.bar.com
port_value: 443
)EOF";

EXPECT_CALL(initialized_, ready());
expectResolve(Network::DnsLookupFamily::V4Only, "foo.bar.com");
setupFromV3Yaml(jitter_yaml);
EXPECT_CALL(membership_updated_, ready());
EXPECT_CALL(*resolve_timer_, enableTimer(testing::Ge(std::chrono::milliseconds(4000)), _));
ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits<int64_t>::min()));
dns_callback_(
Network::DnsResolver::ResolutionStatus::Completed, "",
TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000)));
}

} // namespace
} // namespace Upstream
} // namespace Envoy

0 comments on commit ab3fbaa

Please sign in to comment.