From 057e64217017f8781a9083e824937ed39ccd4cc7 Mon Sep 17 00:00:00 2001 From: alyssawilk Date: Thu, 4 Apr 2024 08:07:29 -0400 Subject: [PATCH] getaddrinfo: retrying on eai_again (#33307) * Reapply "getaddrinfo: retrying on eai_again (#33130)" (#33159) Risk Level: low Testing: new tests Docs Changes: n/a Release Notes: inline [Optional Runtime guard:] yes Signed-off-by: Alyssa Wilk --- changelogs/current.yaml | 4 ++ source/common/runtime/runtime_features.cc | 1 + .../dns_resolver/getaddrinfo/getaddrinfo.cc | 21 ++++++++-- .../getaddrinfo/getaddrinfo_test.cc | 42 ++++++++++++++++++- 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 5d42e1c3e803..eaf79745d8fc 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -34,6 +34,10 @@ minor_behavior_changes: - area: dns change: | Allowing ` to go as low as 1s. +- area: dns + change: | + Fixing a bug in the getaddrinfo resolver where it did not reresolve on EAI_AGAIN. This behavioral change can be temporarily + reverted by setting ``envoy.reloadable_features.dns_reresolve_on_eai_again`` to false. - area: upstream change: | Upstream now excludes hosts set to ``DRAINING`` state via EDS from load balancing and panic routing diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 2fceb79fe5da..13f7b55aac7b 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -40,6 +40,7 @@ RUNTIME_GUARD(envoy_reloadable_features_detect_and_raise_rst_tcp_connection); RUNTIME_GUARD(envoy_reloadable_features_dfp_mixed_scheme); RUNTIME_GUARD(envoy_reloadable_features_disallow_quic_client_udp_mmsg); RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_first_resolve_complete); +RUNTIME_GUARD(envoy_reloadable_features_dns_reresolve_on_eai_again); RUNTIME_GUARD(envoy_reloadable_features_edf_lb_host_scheduler_init_fix); RUNTIME_GUARD(envoy_reloadable_features_edf_lb_locality_scheduler_init_fix); RUNTIME_GUARD(envoy_reloadable_features_enable_compression_bomb_protection); diff --git a/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc b/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc index 30d2646d87c2..59df5b7db7d4 100644 --- a/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc +++ b/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc @@ -22,6 +22,7 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggablejoin(); @@ -31,8 +32,8 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable(dns_name, dns_lookup_family, callback); absl::MutexLock guard(&mutex_); + auto new_query = std::make_unique(dns_name, dns_lookup_family, callback, mutex_); pending_queries_.emplace_back(std::move(new_query)); return pending_queries_.back().get(); } @@ -42,14 +43,18 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggablecancelled_) { + continue; + } } ENVOY_LOG(debug, "popped pending query [{}]", next_query->dns_name_); @@ -178,6 +188,11 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggabledns_name_); + absl::MutexLock guard(&mutex_); + pending_queries_.push_back(next_query); + continue; } else { // TODO(mattklein123): Handle some errors differently such as `EAI_NODATA`. ENVOY_LOG(debug, "getaddrinfo failed with rc={} errno={}", gai_strerror(rc.return_value_), diff --git a/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc b/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc index 7c219f11433f..c6e6a2f6156a 100644 --- a/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc +++ b/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc @@ -140,7 +140,7 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) { TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _)) - .WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0})); + .WillOnce(Return(Api::SysCallIntResult{EAI_FAIL, 0})); resolver_->resolve( "localhost", DnsLookupFamily::All, [this](DnsResolver::ResolutionStatus status, std::list&& response) { @@ -153,6 +153,46 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) { dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); } +TEST_F(GetAddrInfoDnsImplTest, TryAgainAndSuccess) { + TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); + + // 2 calls - one EAGAIN, one success. + EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _)) + .Times(2) + .WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0})) + .WillOnce(Return(Api::SysCallIntResult{0, 0})); + resolver_->resolve( + "localhost", DnsLookupFamily::All, + [this](DnsResolver::ResolutionStatus status, std::list&& response) { + EXPECT_EQ(status, DnsResolver::ResolutionStatus::Success); + EXPECT_TRUE(response.empty()); + + dispatcher_->exit(); + }); + + dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); +} + +TEST_F(GetAddrInfoDnsImplTest, TryAgainThenCancel) { + TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); + + std::atomic query = nullptr; + + EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _)) + .Times(testing::AnyNumber()) + .WillOnce(Invoke([&](const char*, const char*, const addrinfo*, addrinfo**) { + query.load()->cancel(ActiveDnsQuery::CancelReason::QueryAbandoned); + dispatcher_->exit(); + return Api::SysCallIntResult{EAI_AGAIN, 0}; + })); + query = + resolver_->resolve("localhost", DnsLookupFamily::All, + [](DnsResolver::ResolutionStatus, std::list&&) { FAIL(); }); + + dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); + resolver_.reset(); +} + TEST_F(GetAddrInfoDnsImplTest, All) { // See https://github.com/envoyproxy/envoy/issues/28504. DISABLE_UNDER_WINDOWS;