Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #36953 #37510

Merged
merged 2 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/envoy/config/cluster/v3/cluster.proto
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ message Cluster {
// :ref:`STRICT_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.STRICT_DNS>`
// and :ref:`LOGICAL_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.LOGICAL_DNS>`
// this setting is ignored.
google.protobuf.Duration dns_jitter = 58;
google.protobuf.Duration dns_jitter = 58 [(validate.rules).duration = {gte {}}];

// If the DNS failure refresh rate is specified and the cluster type is either
// :ref:`STRICT_DNS<envoy_v3_api_enum_value_config.cluster.v3.Cluster.DiscoveryType.STRICT_DNS>`,
Expand Down
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ bug_fixes:
- area: validation/tools
change: |
Add back missing extension for ``schema_validator_tool``.
- area: DNS
change: |
Fixed bug where setting ``dns_jitter <envoy_v3_api_field_config.cluster.v3.Cluster.dns_jitter>`` to large values caused Envoy Bug
to fire.

removed_config_or_runtime:
# *Normally occurs at the end of the* :ref:`deprecation period <deprecated>`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,14 @@ void LogicalDnsCluster::startResolve() {
final_refresh_rate = addrinfo.ttl_;
}
if (dns_jitter_ms_.count() != 0) {
final_refresh_rate += std::chrono::milliseconds(random_.random()) % dns_jitter_ms_;
// Note that `random_.random()` returns a uint64 while
// `dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64.
// Thus, the modulo of the two will be a positive as long as
// `dns_jitter_ms_.count()` is positive.
// It is important that this be positive, otherwise `final_refresh_rate` could be
// negative causing Envoy to crash.
final_refresh_rate +=
std::chrono::milliseconds(random_.random() % dns_jitter_ms_.count());
}
ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_,
final_refresh_rate.count());
Expand Down
10 changes: 8 additions & 2 deletions source/extensions/clusters/strict_dns/strict_dns_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,14 @@ void StrictDnsClusterImpl::ResolveTarget::startResolve() {
final_refresh_rate.count() > 0);
}
if (parent_.dns_jitter_ms_.count() > 0) {
final_refresh_rate +=
std::chrono::milliseconds(parent_.random_.random()) % parent_.dns_jitter_ms_;
// Note that `parent_.random_.random()` returns a uint64 while
// `parent_.dns_jitter_ms_.count()` returns a signed long that gets cast into a uint64.
// Thus, the modulo of the two will be a positive as long as
// `parent_dns_jitter_ms_.count()` is positive.
// It is important that this be positive, otherwise `final_refresh_rate` could be
// negative causing Envoy to crash.
final_refresh_rate += std::chrono::milliseconds(parent_.random_.random() %
parent_.dns_jitter_ms_.count());
}

ENVOY_LOG(debug, "DNS refresh rate reset for {}, refresh rate {} ms", dns_address_,
Expand Down
59 changes: 59 additions & 0 deletions test/common/upstream/upstream_impl_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <chrono>
#include <cstdint>
#include <limits>
#include <list>
#include <memory>
#include <string>
Expand Down Expand Up @@ -1490,6 +1491,30 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateNoJitter) {
TestUtility::makeDnsResponse({}, std::chrono::seconds(5)));
}

TEST_F(StrictDnsClusterImplTest, NegativeDnsJitter) {
const std::string yaml = R"EOF(
name: name
type: STRICT_DNS
lb_policy: ROUND_ROBIN
dns_refresh_rate: 4s
dns_jitter: -1s
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: localhost1
port_value: 11001
)EOF";
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr,
false);
EXPECT_THROW_WITH_MESSAGE(
auto x = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_),
EnvoyException, "Expected positive duration: seconds: -1\n");
}
TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) {
ResolverData resolver(*dns_resolver_, server_context_.dispatcher_);

Expand Down Expand Up @@ -1533,6 +1558,40 @@ TEST_F(StrictDnsClusterImplTest, TtlAsDnsRefreshRateYesJitter) {
TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(ttl_s)));
}

TEST_F(StrictDnsClusterImplTest, ExtremeJitter) {
ResolverData resolver(*dns_resolver_, server_context_.dispatcher_);

const std::string yaml = R"EOF(
name: name
connect_timeout: 0.25s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
dns_refresh_rate: 1s
dns_jitter: 1000s
respect_dns_ttl: true
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: localhost1
port_value: 11001
)EOF";
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
server_context_, server_context_.cluster_manager_, nullptr, ssl_context_manager_, nullptr,
false);
auto cluster = *StrictDnsClusterImpl::create(cluster_config, factory_context, dns_resolver_);
cluster->initialize([] {});

EXPECT_CALL(*resolver.timer_, enableTimer(testing::Ge(std::chrono::milliseconds(1000)), _));
ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits<int64_t>::min()));
resolver.dns_callback_(
Network::DnsResolver::ResolutionStatus::Completed, "",
TestUtility::makeDnsResponse({"192.168.1.1", "192.168.1.2"}, std::chrono::seconds(1)));
}

// Ensures that HTTP/2 user defined SETTINGS parameter validation is enforced on clusters.
TEST_F(StrictDnsClusterImplTest, Http2UserDefinedSettingsParametersValidation) {
const std::string yaml = R"EOF(
Expand Down
64 changes: 62 additions & 2 deletions test/extensions/clusters/logical_dns/logical_dns_cluster_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <chrono>
#include <limits>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -47,9 +48,11 @@ class LogicalDnsClusterTest : public Event::TestUsingSimulatedTime, public testi
ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_));
}

void setupFromV3Yaml(const std::string& yaml) {
void setupFromV3Yaml(const std::string& yaml, bool expect_success = true) {
ON_CALL(server_context_, api()).WillByDefault(ReturnRef(*api_));
resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_);
if (expect_success) {
resolve_timer_ = new Event::MockTimer(&server_context_.dispatcher_);
}
NiceMock<MockClusterManager> cm;
envoy::config::cluster::v3::Cluster cluster_config = parseClusterFromV3Yaml(yaml);
Envoy::Upstream::ClusterFactoryContextImpl factory_context(
Expand Down Expand Up @@ -647,6 +650,63 @@ TEST_F(LogicalDnsClusterTest, DNSRefreshHasJitter) {
TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000)));
}

TEST_F(LogicalDnsClusterTest, NegativeDnsJitter) {
const std::string yaml = R"EOF(
name: name
type: LOGICAL_DNS
dns_jitter: -1s
lb_policy: ROUND_ROBIN
dns_lookup_family: V4_ONLY
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: foo.bar.com
port_value: 443
)EOF";
EXPECT_THROW_WITH_MESSAGE(setupFromV3Yaml(yaml, false), EnvoyException,
"Expected positive duration: seconds: -1\n");
}

TEST_F(LogicalDnsClusterTest, ExtremeJitter) {
// When random returns large values, they were being reinterpreted as very negative values causing
// negative refresh rates.
const std::string jitter_yaml = R"EOF(
name: name
type: LOGICAL_DNS
dns_refresh_rate: 1s
dns_failure_refresh_rate:
base_interval: 7s
max_interval: 10s
connect_timeout: 0.25s
dns_jitter: 1000s
lb_policy: ROUND_ROBIN
# Since the following expectResolve() requires Network::DnsLookupFamily::V4Only we need to set
# dns_lookup_family to V4_ONLY explicitly for v2 .yaml config.
dns_lookup_family: V4_ONLY
load_assignment:
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: foo.bar.com
port_value: 443
)EOF";

EXPECT_CALL(initialized_, ready());
expectResolve(Network::DnsLookupFamily::V4Only, "foo.bar.com");
setupFromV3Yaml(jitter_yaml);
EXPECT_CALL(membership_updated_, ready());
EXPECT_CALL(*resolve_timer_, enableTimer(testing::Ge(std::chrono::milliseconds(4000)), _));
ON_CALL(random_, random()).WillByDefault(Return(std::numeric_limits<int64_t>::min()));
dns_callback_(
Network::DnsResolver::ResolutionStatus::Completed, "",
TestUtility::makeDnsResponse({"127.0.0.1", "127.0.0.2"}, std::chrono::seconds(3000)));
}

} // namespace
} // namespace Upstream
} // namespace Envoy