Skip to content

Commit

Permalink
getaddrinfo: retrying on eai_again (#33307)
Browse files Browse the repository at this point in the history
* Reapply "getaddrinfo: retrying on eai_again (#33130)" (#33159)

Risk Level: low
Testing: new tests
Docs Changes: n/a
Release Notes: inline
[Optional Runtime guard:] yes

Signed-off-by: Alyssa Wilk <[email protected]>
  • Loading branch information
alyssawilk authored Apr 4, 2024
1 parent 885768f commit 057e642
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 4 deletions.
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ minor_behavior_changes:
- area: dns
change: |
Allowing <envoy_v3_api_field_extensions.common.dynamic_forward_proxy.v3.DnsCacheConfig.dns_min_refresh_rate>` to go as low as 1s.
- area: dns
change: |
Fixing a bug in the getaddrinfo resolver where it did not reresolve on EAI_AGAIN. This behavioral change can be temporarily
reverted by setting ``envoy.reloadable_features.dns_reresolve_on_eai_again`` to false.
- area: upstream
change: |
Upstream now excludes hosts set to ``DRAINING`` state via EDS from load balancing and panic routing
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUNTIME_GUARD(envoy_reloadable_features_detect_and_raise_rst_tcp_connection);
RUNTIME_GUARD(envoy_reloadable_features_dfp_mixed_scheme);
RUNTIME_GUARD(envoy_reloadable_features_disallow_quic_client_udp_mmsg);
RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_first_resolve_complete);
RUNTIME_GUARD(envoy_reloadable_features_dns_reresolve_on_eai_again);
RUNTIME_GUARD(envoy_reloadable_features_edf_lb_host_scheduler_init_fix);
RUNTIME_GUARD(envoy_reloadable_features_edf_lb_locality_scheduler_init_fix);
RUNTIME_GUARD(envoy_reloadable_features_enable_compression_bomb_protection);
Expand Down
21 changes: 18 additions & 3 deletions source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge
{
absl::MutexLock guard(&mutex_);
shutting_down_ = true;
pending_queries_.clear();
}

resolver_thread_->join();
Expand All @@ -31,8 +32,8 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge
ActiveDnsQuery* resolve(const std::string& dns_name, DnsLookupFamily dns_lookup_family,
ResolveCb callback) override {
ENVOY_LOG(debug, "adding new query [{}] to pending queries", dns_name);
auto new_query = std::make_unique<PendingQuery>(dns_name, dns_lookup_family, callback);
absl::MutexLock guard(&mutex_);
auto new_query = std::make_unique<PendingQuery>(dns_name, dns_lookup_family, callback, mutex_);
pending_queries_.emplace_back(std::move(new_query));
return pending_queries_.back().get();
}
Expand All @@ -42,14 +43,18 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge
private:
class PendingQuery : public ActiveDnsQuery {
public:
PendingQuery(const std::string& dns_name, DnsLookupFamily dns_lookup_family, ResolveCb callback)
: dns_name_(dns_name), dns_lookup_family_(dns_lookup_family), callback_(callback) {}
PendingQuery(const std::string& dns_name, DnsLookupFamily dns_lookup_family, ResolveCb callback,
absl::Mutex& mutex)
: mutex_(mutex), dns_name_(dns_name), dns_lookup_family_(dns_lookup_family),
callback_(callback) {}

void cancel(CancelReason) override {
ENVOY_LOG(debug, "cancelling query [{}]", dns_name_);
absl::MutexLock guard(&mutex_);
cancelled_ = true;
}

absl::Mutex& mutex_;
const std::string dns_name_;
const DnsLookupFamily dns_lookup_family_;
ResolveCb callback_;
Expand Down Expand Up @@ -145,6 +150,8 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge

while (true) {
PendingQuerySharedPtr next_query;
const bool reresolve =
Runtime::runtimeFeatureEnabled("envoy.reloadable_features.dns_reresolve_on_eai_again");
{
absl::MutexLock guard(&mutex_);
auto condition = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
Expand All @@ -157,6 +164,9 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge

next_query = std::move(pending_queries_.front());
pending_queries_.pop_front();
if (reresolve && next_query->cancelled_) {
continue;
}
}

ENVOY_LOG(debug, "popped pending query [{}]", next_query->dns_name_);
Expand All @@ -178,6 +188,11 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge
auto addrinfo_wrapper = AddrInfoWrapper(addrinfo_result_do_not_use);
if (rc.return_value_ == 0) {
response = processResponse(*next_query, addrinfo_wrapper.get());
} else if (reresolve && rc.return_value_ == EAI_AGAIN) {
ENVOY_LOG(debug, "retrying query [{}]", next_query->dns_name_);
absl::MutexLock guard(&mutex_);
pending_queries_.push_back(next_query);
continue;
} else {
// TODO(mattklein123): Handle some errors differently such as `EAI_NODATA`.
ENVOY_LOG(debug, "getaddrinfo failed with rc={} errno={}", gai_strerror(rc.return_value_),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0}));
.WillOnce(Return(Api::SysCallIntResult{EAI_FAIL, 0}));
resolver_->resolve(
"localhost", DnsLookupFamily::All,
[this](DnsResolver::ResolutionStatus status, std::list<DnsResponse>&& response) {
Expand All @@ -153,6 +153,46 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) {
dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
}

TEST_F(GetAddrInfoDnsImplTest, TryAgainAndSuccess) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

// 2 calls - one EAGAIN, one success.
EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.Times(2)
.WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0}))
.WillOnce(Return(Api::SysCallIntResult{0, 0}));
resolver_->resolve(
"localhost", DnsLookupFamily::All,
[this](DnsResolver::ResolutionStatus status, std::list<DnsResponse>&& response) {
EXPECT_EQ(status, DnsResolver::ResolutionStatus::Success);
EXPECT_TRUE(response.empty());

dispatcher_->exit();
});

dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
}

TEST_F(GetAddrInfoDnsImplTest, TryAgainThenCancel) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

std::atomic<ActiveDnsQuery*> query = nullptr;

EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.Times(testing::AnyNumber())
.WillOnce(Invoke([&](const char*, const char*, const addrinfo*, addrinfo**) {
query.load()->cancel(ActiveDnsQuery::CancelReason::QueryAbandoned);
dispatcher_->exit();
return Api::SysCallIntResult{EAI_AGAIN, 0};
}));
query =
resolver_->resolve("localhost", DnsLookupFamily::All,
[](DnsResolver::ResolutionStatus, std::list<DnsResponse>&&) { FAIL(); });

dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
resolver_.reset();
}

TEST_F(GetAddrInfoDnsImplTest, All) {
// See https://github.com/envoyproxy/envoy/issues/28504.
DISABLE_UNDER_WINDOWS;
Expand Down

0 comments on commit 057e642

Please sign in to comment.