From 2cb2617ccaff233dd6383cd0e939d9858792a0ef Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 12 Feb 2024 22:45:29 -0500 Subject: [PATCH 01/39] add streaming variant of the admin API Signed-off-by: Joshua Marantz --- envoy/server/admin.h | 7 ++ source/exe/main_common.cc | 92 +++++++++++++++++++++++++ source/exe/main_common.h | 17 +++++ source/server/admin/admin.h | 7 +- source/server/config_validation/admin.h | 3 + source/server/server.h | 5 +- test/mocks/server/admin.h | 2 + 7 files changed, 125 insertions(+), 8 deletions(-) diff --git a/envoy/server/admin.h b/envoy/server/admin.h index 251b0cf47b44..4188b40b8c8d 100644 --- a/envoy/server/admin.h +++ b/envoy/server/admin.h @@ -281,6 +281,13 @@ class Admin { * Closes the listening socket for the admin. */ virtual void closeSocket() PURE; + + virtual GenRequestFn createRequestFunction() const PURE; + + /** + * Creates a Request from the request in the admin stream. + */ + virtual RequestPtr makeRequest(AdminStream& admin_stream) const PURE; }; } // namespace Server diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index d18a575f2dd0..b62fd344a566 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -89,6 +89,98 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string handler(*response_headers, body); }); } + +namespace { +class AdminResponseImpl : public MainCommonBase::AdminResponse { +public: + AdminResponseImpl(Server::Instance& server, absl::string_view path, absl::string_view method) + : server_(server), opt_admin_(server.admin()) { + request_headers_->setMethod(method); + request_headers_->setPath(path); + } + + void getHeaders(HeadersFn fn) override { + if (!opt_admin_) { + fn(Http::Code::InternalServerError, *response_headers_); + return; + } + + server_.dispatcher().post([this, fn]() { + if (!sent_headers_.exchange(true)) { + Server::AdminFilter filter(opt_admin_->createRequestFunction()); + filter.decodeHeaders(*request_headers_, false); + request_ = opt_admin_->makeRequest(filter); + code_ = request_->start(*response_headers_); + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + fn(code_, *response_headers_); + } + }); + if (server_.isShutdown()) { + if (!sent_headers_.exchange(true)) { + fn(Http::Code::InternalServerError, *response_headers_); + } + } + } + + void nextChunk(BodyFn fn) override { + server_.dispatcher().post([this, fn]() { + while (response_.length() == 0 && more_data_) { + more_data_ = request_->nextChunk(response_); + } + /* Buffer::SliceDataPtr slice; + absl::string_view chunk; + if (response_.length() != 0) { + slice = response_.extractMutableFrontSlice(); + absl::Span data = slice->getMutableData(); + more_data_ |= response_.length() != 0; + chunk = absl::string_view( + reinterpret_cast(data.data()), data.size()); + } else { + more_data_ = false; + } + sendChunk(fn, chunk, more_data_); + */ + sendChunk(fn, response_, more_data_); + }); + + // We do not know if the post () worked. + if (server_.isShutdown()) { + Buffer::OwnedImpl error; + error.add("server was shut down"); + sendChunk(fn, error, false); + } + } + + void sendChunk(BodyFn fn, Buffer::Instance& chunk, bool more_data) { + { + absl::MutexLock lock(&mutex_); + if (sent_end_stream_) { + return; + } + sent_end_stream_ = !more_data; + } + fn(chunk, more_data); + } + +private: + Server::Instance& server_; + OptRef opt_admin_; + Buffer::OwnedImpl response_; + Http::Code code_; + Server::Admin::RequestPtr request_; + Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; + Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; + bool more_data_{true}; + std::atomic sent_headers_{false}; + bool sent_end_stream_{false}; + absl::Mutex mutex_; +}; +} // namespace + +MainCommonBase::AdminResponsePtr MainCommonBase::adminRequest(absl::string_view path_and_query, + absl::string_view method) { + return std::make_unique(*server(), path_and_query, method); +} #endif MainCommon::MainCommon(const std::vector& args) diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 349decdec0cc..dd18ed4d7022 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -34,6 +34,18 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY + class AdminResponse { + public: + virtual ~AdminResponse() = default; + + using HeadersFn = std::function; + virtual void getHeaders(HeadersFn fn) PURE; + + using BodyFn = std::function; + virtual void nextChunk(BodyFn fn) PURE; + }; + using AdminResponsePtr = std::unique_ptr; + using AdminRequestFn = std::function; @@ -51,6 +63,7 @@ class MainCommonBase : public StrippedMainBase { // semantics, rather than a handler callback. void adminRequest(absl::string_view path_and_query, absl::string_view method, const AdminRequestFn& handler); + AdminResponsePtr adminRequest(absl::string_view path_and_query, absl::string_view method); #endif }; @@ -82,6 +95,10 @@ class MainCommon { const MainCommonBase::AdminRequestFn& handler) { base_.adminRequest(path_and_query, method, handler); } + MainCommonBase::AdminResponsePtr adminRequest(absl::string_view path_and_query, + absl::string_view method) { + return base_.adminRequest(path_and_query, method); + } #endif static std::string hotRestartVersion(bool hot_restart_enabled); diff --git a/source/server/admin/admin.h b/source/server/admin/admin.h index c77be2b2b92b..b191e4932ff4 100644 --- a/source/server/admin/admin.h +++ b/source/server/admin/admin.h @@ -216,7 +216,7 @@ class AdminImpl : public Admin, void closeSocket() override; void addListenerToHandler(Network::ConnectionHandler* handler) override; - GenRequestFn createRequestFunction() const { + GenRequestFn createRequestFunction() const override { return [this](AdminStream& admin_stream) -> RequestPtr { return makeRequest(admin_stream); }; } uint64_t maxRequestsPerConnection() const override { return 0; } @@ -245,10 +245,7 @@ class AdminImpl : public Admin, ::Envoy::Http::HeaderValidatorStats& getHeaderValidatorStats(Http::Protocol protocol); #endif - /** - * Creates a Request from the request in the admin stream. - */ - RequestPtr makeRequest(AdminStream& admin_stream) const; + RequestPtr makeRequest(AdminStream& admin_stream) const override; /** * Creates a UrlHandler structure from a non-chunked callback. diff --git a/source/server/config_validation/admin.h b/source/server/config_validation/admin.h index 173afb29d355..f57470e6e358 100644 --- a/source/server/config_validation/admin.h +++ b/source/server/config_validation/admin.h @@ -40,6 +40,9 @@ class ValidationAdmin : public Admin { uint32_t concurrency() const override { return 1; } void closeSocket() override {} + GenRequestFn createRequestFunction() const override { return nullptr; } + RequestPtr makeRequest(AdminStream&) const override { return nullptr; } + private: ConfigTrackerImpl config_tracker_; Network::SocketSharedPtr socket_; diff --git a/source/server/server.h b/source/server/server.h index dcfd5be7e5a9..3f4e4617d9df 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -332,8 +332,7 @@ class InstanceBase : Logger::Loggable, void loadServerFlags(const absl::optional& flags_path); void startWorkers(); void terminate(); - void notifyCallbacksForStage( - Stage stage, std::function completion_cb = [] {}); + void notifyCallbacksForStage(Stage stage, std::function completion_cb = [] {}); void onRuntimeReady(); void onClusterManagerPrimaryInitializationComplete(); @@ -352,7 +351,7 @@ class InstanceBase : Logger::Loggable, std::unique_ptr secret_manager_; bool workers_started_{false}; std::atomic live_; - bool shutdown_{false}; + std::atomic shutdown_{false}; const Options& options_; ProtobufMessage::ProdValidationContextImpl validation_context_; TimeSource& time_source_; diff --git a/test/mocks/server/admin.h b/test/mocks/server/admin.h index c7308d6a044e..9c8b1875a018 100644 --- a/test/mocks/server/admin.h +++ b/test/mocks/server/admin.h @@ -40,6 +40,8 @@ class MockAdmin : public Admin { MOCK_METHOD(void, addListenerToHandler, (Network::ConnectionHandler * handler)); MOCK_METHOD(uint32_t, concurrency, (), (const)); MOCK_METHOD(void, closeSocket, ()); + MOCK_METHOD(GenRequestFn, createRequestFunction, (), (const)); + MOCK_METHOD(RequestPtr, makeRequest, (AdminStream & admin_stream), (const)); NiceMock config_tracker_; NiceMock socket_; From d2412c6289c7a205d27034b406c4c32136bdb292 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 12 Feb 2024 22:48:19 -0500 Subject: [PATCH 02/39] remove superfluous diff Signed-off-by: Joshua Marantz --- source/server/server.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/server/server.h b/source/server/server.h index 3f4e4617d9df..dcfd5be7e5a9 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -332,7 +332,8 @@ class InstanceBase : Logger::Loggable, void loadServerFlags(const absl::optional& flags_path); void startWorkers(); void terminate(); - void notifyCallbacksForStage(Stage stage, std::function completion_cb = [] {}); + void notifyCallbacksForStage( + Stage stage, std::function completion_cb = [] {}); void onRuntimeReady(); void onClusterManagerPrimaryInitializationComplete(); @@ -351,7 +352,7 @@ class InstanceBase : Logger::Loggable, std::unique_ptr secret_manager_; bool workers_started_{false}; std::atomic live_; - std::atomic shutdown_{false}; + bool shutdown_{false}; const Options& options_; ProtobufMessage::ProdValidationContextImpl validation_context_; TimeSource& time_source_; From 48f114d6797a94201f1acfd2b7cbfb397a5c28b1 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 13 Feb 2024 14:03:15 -0500 Subject: [PATCH 03/39] add streaming test Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 69 ++++++++++++++++++++++ test/integration/admin_html/test_server.cc | 3 +- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 68d142e5b3b8..22ba68ca508c 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -45,6 +45,23 @@ const std::string& outOfMemoryPattern() { } #endif +class StreamingAdminRequest : public Envoy::Server::Admin::Request { +public: + static constexpr uint64_t NumChunks = 10; + static constexpr uint64_t BytesPerChunk = 10000; + + StreamingAdminRequest() : chunk_(BytesPerChunk, 'a') {} + Http::Code start(Http::ResponseHeaderMap&) override { return Http::Code::OK; } + bool nextChunk(Buffer::Instance& response) override { + response.add(chunk_); + return --chunks_remaining_ > 0; + } + +private: + const std::string chunk_; + uint64_t chunks_remaining_{NumChunks}; +}; + } // namespace /** @@ -287,6 +304,44 @@ class AdminRequestTest : public MainCommonTest { return out; } + void setupStreamingRequest(absl::string_view prefix) { + Server::Admin& admin = *main_common_->server()->admin(); + admin.addStreamingHandler( + std::string(prefix), "streaming api", + [](Server::AdminStream&) -> Server::Admin::RequestPtr { + return std::make_unique(); + }, + true, false); + } + + using ChunksBytes = std::pair; + ChunksBytes adminStreamingRequest(absl::string_view path, absl::string_view method) { + absl::Notification done; + std::vector out; + MainCommonBase::AdminResponsePtr response = main_common_->adminRequest(path, method); + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + bool cont = true; + uint64_t num_chunks = 0; + uint64_t num_bytes = 0; + while (cont) { + absl::Notification chunk_notify; + response->nextChunk( + [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { + cont = more; + num_bytes += chunk.length(); + chunk.drain(chunk.length()); + ++num_chunks; + chunk_notify.Notify(); + }); + chunk_notify.WaitForNotification(); + } + + return ChunksBytes(num_chunks, num_bytes); + } + // Initiates Envoy running in its own thread. void startEnvoy() { envoy_thread_ = Thread::threadFactoryForTest().createThread([this]() { @@ -371,6 +426,20 @@ TEST_P(AdminRequestTest, AdminRequestGetStatsAndQuit) { EXPECT_TRUE(waitForEnvoyToExit()); } +TEST_P(AdminRequestTest, AdminStreamingRequestGetStatsAndQuit) { + startEnvoy(); + started_.WaitForNotification(); + + setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = adminStreamingRequest("/stream", "GET"); + EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); + EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); +} + // no signals on Windows -- could probably make this work with GenerateConsoleCtrlEvent #ifndef WIN32 // This test is identical to the above one, except that instead of using an admin /quitquitquit, diff --git a/test/integration/admin_html/test_server.cc b/test/integration/admin_html/test_server.cc index 4a53d7661bb9..d371d7b22108 100644 --- a/test/integration/admin_html/test_server.cc +++ b/test/integration/admin_html/test_server.cc @@ -14,7 +14,8 @@ namespace { * a query param but it could not be found. * * This test-server is only for testing; it potentially makes the - * entire file-system avail + * entire file-system available to HTTP clients, so this should not + * be used for production systems. */ Http::Code testCallback(Http::ResponseHeaderMap& response_headers, Buffer::Instance& response, Server::AdminStream& admin_stream) { From d890eb3ef256530bfb86c8f8fcfae50c9223d912 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 18 Feb 2024 15:19:45 -0500 Subject: [PATCH 04/39] add cancellation semantics and tests. Signed-off-by: Joshua Marantz --- envoy/server/admin.h | 2 - source/exe/main_common.cc | 246 ++++++++++++++++++------ source/exe/main_common.h | 71 ++++++- source/server/admin/admin.cc | 5 +- source/server/admin/admin.h | 3 - source/server/admin/admin_filter.cc | 5 +- source/server/admin/admin_filter.h | 10 +- source/server/config_validation/admin.h | 2 - test/exe/main_common_test.cc | 82 +++++++- test/mocks/server/admin.h | 1 - test/server/admin/admin_filter_test.cc | 11 +- test/server/admin/admin_instance.cc | 2 +- 12 files changed, 353 insertions(+), 87 deletions(-) diff --git a/envoy/server/admin.h b/envoy/server/admin.h index 4188b40b8c8d..10c4e35d47fb 100644 --- a/envoy/server/admin.h +++ b/envoy/server/admin.h @@ -282,8 +282,6 @@ class Admin { */ virtual void closeSocket() PURE; - virtual GenRequestFn createRequestFunction() const PURE; - /** * Creates a Request from the request in the admin stream. */ diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index b62fd344a566..ea9111721db9 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -10,6 +10,7 @@ #include "source/common/common/compiler_requirements.h" #include "source/common/common/logger.h" #include "source/common/common/perf_annotation.h" +#include "source/common/common/thread.h" #include "source/common/network/utility.h" #include "source/common/stats/thread_local_store.h" #include "source/exe/platform_impl.h" @@ -59,23 +60,34 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::move(process_context), createFunction()) {} bool MainCommonBase::run() { + // Avoid returning from cases due to gcc warnings and uncovered lines. + bool ret = true; + switch (options_.mode()) { case Server::Mode::Serve: runServer(); - return true; + terminateAdminRequests(); + break; case Server::Mode::Validate: - return Server::validateConfig( + ret = Server::validateConfig( options_, Network::Utility::getLocalAddress(options_.localAddressIpVersion()), component_factory_, platform_impl_->threadFactory(), platform_impl_->fileSystem(), process_context_ ? ProcessContextOptRef(std::ref(*process_context_)) : absl::nullopt); + break; case Server::Mode::InitOnly: PERF_DUMP(); - return true; + break; + default: + ret = false; } - return false; // for gcc. + return ret; } #ifdef ENVOY_ADMIN_FUNCTIONALITY + +// This request variant buffers the entire response in one string. New uses +// should opt for the streaming version below, where an AdminResponse object +// is created and used to stream data with flow-control. void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method, const AdminRequestFn& handler) { std::string path_and_query_buf = std::string(path_and_query); @@ -93,93 +105,215 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string namespace { class AdminResponseImpl : public MainCommonBase::AdminResponse { public: - AdminResponseImpl(Server::Instance& server, absl::string_view path, absl::string_view method) - : server_(server), opt_admin_(server.admin()) { + using CleanupFn = std::function; + + AdminResponseImpl(Server::Instance& server, absl::string_view path, absl::string_view method, + CleanupFn cleanup) + : server_(server), opt_admin_(server.admin()), cleanup_(cleanup) { request_headers_->setMethod(method); request_headers_->setPath(path); } - void getHeaders(HeadersFn fn) override { - if (!opt_admin_) { - fn(Http::Code::InternalServerError, *response_headers_); - return; + ~AdminResponseImpl() { + bool call_cleanup = false; + { + absl::MutexLock lock(&mutex_); + if (!terminated_) { + call_cleanup = true; + } + } + if (call_cleanup) { + cleanup_(this); } + } - server_.dispatcher().post([this, fn]() { - if (!sent_headers_.exchange(true)) { - Server::AdminFilter filter(opt_admin_->createRequestFunction()); - filter.decodeHeaders(*request_headers_, false); - request_ = opt_admin_->makeRequest(filter); - code_ = request_->start(*response_headers_); - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - fn(code_, *response_headers_); + void getHeaders(HeadersFn fn) override { + { + absl::MutexLock lock(&mutex_); + ASSERT(headers_fn_ == nullptr); + if (cancelled_) { + return; } - }); - if (server_.isShutdown()) { - if (!sent_headers_.exchange(true)) { - fn(Http::Code::InternalServerError, *response_headers_); + headers_fn_ = fn; + if (terminated_ || !opt_admin_) { + sendErrorLockHeld(); + return; } } + server_.dispatcher().post([this, response = shared_from_this()]() { requestHeaders(); }); } void nextChunk(BodyFn fn) override { - server_.dispatcher().post([this, fn]() { - while (response_.length() == 0 && more_data_) { - more_data_ = request_->nextChunk(response_); + // Note the caller may race a call to nextChunk with the server being + // terminated. + { + absl::MutexLock lock(&mutex_); + ASSERT(body_fn_ == nullptr); + body_fn_ = fn; + if (terminated_ || cancelled_ || !opt_admin_) { + sendAbortChunkLockHeld(); + return; } - /* Buffer::SliceDataPtr slice; - absl::string_view chunk; - if (response_.length() != 0) { - slice = response_.extractMutableFrontSlice(); - absl::Span data = slice->getMutableData(); - more_data_ |= response_.length() != 0; - chunk = absl::string_view( - reinterpret_cast(data.data()), data.size()); - } else { - more_data_ = false; + } + + // Note that nextChunk may be called from any thread -- it's the callers choice, + // including the Envoy main thread, which would occur if the caller initiates + // the request of a chunk upon receipt of the previous chunk. + // + // In that case it may race against the AdminResponse object being deleted, + // in which case the callbacks, held in a shared_ptr, will be cancelled + // from the destructor. If that happens *before* we post to the main thread, + // we will just skip and never call fn. + server_.dispatcher().post([this, response = shared_from_this()]() { RequestNextChunk(); }); + } + + // Called by the user if it is not longer interested in the result of the + // admin request. After calling cancel() the caller must not call nextChunk or + // getHeaders. + void cancel() override { + absl::MutexLock lock(&mutex_); + cancelled_ = true; + headers_fn_ = nullptr; + body_fn_ = nullptr; + } + + // Called from MainCommonBase::terminateAdminRequests when the Envoy server + // terminates. After this is called, the caller may need to complete the + // admin response, and so calls to getHeader and nextChunk remain valid, + // resulting in 503 and an empty body. + void terminate() override { + absl::MutexLock lock(&mutex_); + terminated_ = true; + sendErrorLockHeld(); + sendAbortChunkLockHeld(); + } + + /*void handleBody(Buffer::Instance& chunk, bool more) { + // This always gets called from the main thread since that's + // where admin runs. But the caller may delete the Response + // from any thread. + absl::MutexLock lock(&mutex_); + if (!cancelled_ && body_fn_ != nullptr) { + if (more || !sent_end_stream_) { + sent_end_stream_ = !more; + body_fn_(chunk, more); + } + body_fn_ = nullptr; + } + }*/ + +private: + void requestHeaders() { + { + absl::MutexLock lock(&mutex_); + if (cancelled_ || terminated_) { + return; + } + } + Server::AdminFilter filter(*opt_admin_); + filter.decodeHeaders(*request_headers_, false); + request_ = opt_admin_->makeRequest(filter); + code_ = request_->start(*response_headers_); + { + absl::MutexLock lock(&mutex_); + if (headers_fn_ != nullptr && !cancelled_) { + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; } - sendChunk(fn, chunk, more_data_); - */ - sendChunk(fn, response_, more_data_); - }); - - // We do not know if the post () worked. - if (server_.isShutdown()) { - Buffer::OwnedImpl error; - error.add("server was shut down"); - sendChunk(fn, error, false); } } - void sendChunk(BodyFn fn, Buffer::Instance& chunk, bool more_data) { + void RequestNextChunk() { { absl::MutexLock lock(&mutex_); - if (sent_end_stream_) { + if (cancelled_ || terminated_) { return; } - sent_end_stream_ = !more_data; } - fn(chunk, more_data); + while (response_.length() == 0 && more_data_) { + more_data_ = request_->nextChunk(response_); + } + { + absl::MutexLock lock(&mutex_); + if (sent_end_stream_ || cancelled_ || terminated_) { + return; + } + sent_end_stream_ = !more_data_; + body_fn_(response_, more_data_); + ASSERT(response_.length() == 0); + body_fn_ = nullptr; + } + } + + void sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (!sent_end_stream_ && body_fn_ != nullptr) { + response_.drain(response_.length()); + body_fn_(response_, false); + sent_end_stream_ = true; + } + body_fn_ = nullptr; + } + + void sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (headers_fn_ != nullptr) { + code_ = Http::Code::InternalServerError; + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; + } } -private: Server::Instance& server_; OptRef opt_admin_; Buffer::OwnedImpl response_; Http::Code code_; Server::Admin::RequestPtr request_; + CleanupFn cleanup_; Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; - bool more_data_{true}; - std::atomic sent_headers_{false}; - bool sent_end_stream_{false}; + bool more_data_ = true; + + // True if cancel() was explicitly called by the user; headers and body + // callbacks are never called after cancel(). + bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; + + // True if the Envoy server has stopped running its main loop. Headers + // and boxy callbacks are called after this. + bool terminated_ ABSL_GUARDED_BY(mutex_) = false; + + // bool sent_headers_ ABSL_GUARDED_BY(mutex_) = false; + bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; + HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); + BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); absl::Mutex mutex_; }; } // namespace -MainCommonBase::AdminResponsePtr MainCommonBase::adminRequest(absl::string_view path_and_query, - absl::string_view method) { - return std::make_unique(*server(), path_and_query, method); +void MainCommonBase::terminateAdminRequests() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + absl::flat_hash_set response_set; + absl::MutexLock lock(&mutex_); + while (!response_set_.empty()) { + auto iter = response_set_.begin(); + (*iter)->terminate(); + response_set_.erase(iter); + } +} + +void MainCommonBase::detachResponse(AdminResponse* response) { + absl::MutexLock lock(&mutex_); + response_set_.erase(response); +} + +MainCommonBase::AdminResponseSharedPtr +MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { + auto response = std::make_unique( + *server(), path_and_query, method, + [this](AdminResponse* response) { detachResponse(response); }); + absl::MutexLock lock(&mutex_); + response_set_.insert(response.get()); + return response; } #endif diff --git a/source/exe/main_common.h b/source/exe/main_common.h index dd18ed4d7022..a5839515093d 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -34,17 +34,65 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY - class AdminResponse { + class AdminResponse : public std::enable_shared_from_this { public: virtual ~AdminResponse() = default; + /** + * Requests the headers for the response. This can be called from any + * thread, and HeaderFn may also be called from any thread. + * + * HeadersFn will not be called after cancel(). It is invalid to + * to call nextChunk from within HeadersFn -- the caller must trigger + * such a call on another thread, after HeadersFn returns. Calling + * nextChunk from HeadersFn may deadlock. + * + * If the server is shut down during the operation, headersFn will + * be called with a 503. + * + * It is a programming error to call getHeaders after calling cancel. + * + * @param fn The function to be called with the headers and status code. + */ using HeadersFn = std::function; virtual void getHeaders(HeadersFn fn) PURE; + /** + * Requests a new chunk. This can be called from any thread, and the BodyFn + * callback may also be called from any thread. BodyFn will be called in a + * loop until the Buffer passed to it is fully drained. When 'false' is + * passed as the second arg to BodyFn, that signifies the end of the + * response, and nextChunk must not be called again. + * + * BodyFn will not be called after cancel(). It is invalid to + * to call nextChunk from within BodyFn -- the caller must trigger + * such a call on another thread, after BodyFn returns. Calling + * nextChunk from BodyFn may deadlock. + * + * If the server is shut down during the operation, bodyFn will + * be called with an empty body and 'false' for more_data. + * + * It is a programming error to call nextChunk after calling cancel. + * + * @param fn A function to be called on each chunk. + */ using BodyFn = std::function; virtual void nextChunk(BodyFn fn) PURE; + + /** + * Requests that any outstanding callbacks be dropped. This can be called + * when the context in which the request is made is destroyed. This can be + * useful to allow an application above to register a timeout. The Response + * itself is held as a shared_ptr as that makes it much easier to manage + * cancellation across multiple threads. + */ + virtual void cancel() PURE; + + // Called when the server is terminated. The response remains + // valid after this. + virtual void terminate() PURE; }; - using AdminResponsePtr = std::unique_ptr; + using AdminResponseSharedPtr = std::shared_ptr; using AdminRequestFn = std::function; @@ -63,7 +111,20 @@ class MainCommonBase : public StrippedMainBase { // semantics, rather than a handler callback. void adminRequest(absl::string_view path_and_query, absl::string_view method, const AdminRequestFn& handler); - AdminResponsePtr adminRequest(absl::string_view path_and_query, absl::string_view method); + AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, absl::string_view method); + + // Called when a streaming response is terminated, either by completing normally + // or having the caller call cancel on it. Either way it needs to be removed from + // the set that will be used by terminateAdminRequests below. + void detachResponse(AdminResponse*); + +private: + // Called after the server run-loop finishes; any outstanding streaming admin requests + // will otherwise hang as the main-thread dispatcher loop will no longer run. + void terminateAdminRequests(); + + absl::Mutex mutex_; + absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); #endif }; @@ -95,8 +156,8 @@ class MainCommon { const MainCommonBase::AdminRequestFn& handler) { base_.adminRequest(path_and_query, method, handler); } - MainCommonBase::AdminResponsePtr adminRequest(absl::string_view path_and_query, - absl::string_view method) { + MainCommonBase::AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, + absl::string_view method) { return base_.adminRequest(path_and_query, method); } #endif diff --git a/source/server/admin/admin.cc b/source/server/admin/admin.cc index 07f3b271381a..a7ffc489b97b 100644 --- a/source/server/admin/admin.cc +++ b/source/server/admin/admin.cc @@ -293,7 +293,7 @@ bool AdminImpl::createNetworkFilterChain(Network::Connection& connection, bool AdminImpl::createFilterChain(Http::FilterChainManager& manager, bool, const Http::FilterChainOptions&) const { Http::FilterFactoryCb factory = [this](Http::FilterChainFactoryCallbacks& callbacks) { - callbacks.addStreamFilter(std::make_shared(createRequestFunction())); + callbacks.addStreamFilter(std::make_shared(*this)); }; manager.applyFilterFactoryCb({}, factory); return true; @@ -494,7 +494,8 @@ bool AdminImpl::removeHandler(const std::string& prefix) { Http::Code AdminImpl::request(absl::string_view path_and_query, absl::string_view method, Http::ResponseHeaderMap& response_headers, std::string& body) { - AdminFilter filter(createRequestFunction()); + // AdminFilter filter(createRequestFunction()); + AdminFilter filter(*this); auto request_headers = Http::RequestHeaderMapImpl::create(); request_headers->setMethod(method); diff --git a/source/server/admin/admin.h b/source/server/admin/admin.h index b191e4932ff4..9ab25c0611d0 100644 --- a/source/server/admin/admin.h +++ b/source/server/admin/admin.h @@ -216,9 +216,6 @@ class AdminImpl : public Admin, void closeSocket() override; void addListenerToHandler(Network::ConnectionHandler* handler) override; - GenRequestFn createRequestFunction() const override { - return [this](AdminStream& admin_stream) -> RequestPtr { return makeRequest(admin_stream); }; - } uint64_t maxRequestsPerConnection() const override { return 0; } const HttpConnectionManagerProto::ProxyStatusConfig* proxyStatusConfig() const override { return proxy_status_config_.get(); diff --git a/source/server/admin/admin_filter.cc b/source/server/admin/admin_filter.cc index 7a11c251dc88..7de083c5d1b7 100644 --- a/source/server/admin/admin_filter.cc +++ b/source/server/admin/admin_filter.cc @@ -5,8 +5,7 @@ namespace Envoy { namespace Server { -AdminFilter::AdminFilter(Admin::GenRequestFn admin_handler_fn) - : admin_handler_fn_(admin_handler_fn) {} +AdminFilter::AdminFilter(const Admin& admin) : admin_(admin) {} Http::FilterHeadersStatus AdminFilter::decodeHeaders(Http::RequestHeaderMap& headers, bool end_stream) { @@ -87,7 +86,7 @@ void AdminFilter::onComplete() { auto header_map = Http::ResponseHeaderMapImpl::create(); RELEASE_ASSERT(request_headers_, ""); - Admin::RequestPtr handler = admin_handler_fn_(*this); + Admin::RequestPtr handler = admin_.makeRequest(*this); Http::Code code = handler->start(*header_map); Utility::populateFallbackResponseHeaders(code, *header_map); decoder_callbacks_->encodeHeaders(std::move(header_map), false, diff --git a/source/server/admin/admin_filter.h b/source/server/admin/admin_filter.h index e163029b2283..2dbe6d01df94 100644 --- a/source/server/admin/admin_filter.h +++ b/source/server/admin/admin_filter.h @@ -28,7 +28,13 @@ class AdminFilter : public Http::PassThroughFilter, absl::string_view path_and_query, Http::ResponseHeaderMap& response_headers, Buffer::OwnedImpl& response, AdminFilter& filter)>; - AdminFilter(Admin::GenRequestFn admin_handler_func); + /** + * Instantiates an AdminFilter. + * + * @param admin the admin context from which to create the filter. This is used + * to create a request object based on the path. + */ + AdminFilter(const Admin& admin); // Http::StreamFilterBase // Handlers relying on the reference should use addOnDestroyCallback() @@ -58,7 +64,7 @@ class AdminFilter : public Http::PassThroughFilter, * Called when an admin request has been completely received. */ void onComplete(); - Admin::GenRequestFn admin_handler_fn_; + const Admin& admin_; Http::RequestHeaderMap* request_headers_{}; std::list> on_destroy_callbacks_; bool end_stream_on_complete_ = true; diff --git a/source/server/config_validation/admin.h b/source/server/config_validation/admin.h index f57470e6e358..8e75607e3f8f 100644 --- a/source/server/config_validation/admin.h +++ b/source/server/config_validation/admin.h @@ -39,8 +39,6 @@ class ValidationAdmin : public Admin { void addListenerToHandler(Network::ConnectionHandler* handler) override; uint32_t concurrency() const override { return 1; } void closeSocket() override {} - - GenRequestFn createRequestFunction() const override { return nullptr; } RequestPtr makeRequest(AdminStream&) const override { return nullptr; } private: diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 22ba68ca508c..8e89bb3a8154 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -304,7 +304,7 @@ class AdminRequestTest : public MainCommonTest { return out; } - void setupStreamingRequest(absl::string_view prefix) { + MainCommonBase::AdminResponseSharedPtr setupStreamingRequest(absl::string_view prefix) { Server::Admin& admin = *main_common_->server()->admin(); admin.addStreamingHandler( std::string(prefix), "streaming api", @@ -312,13 +312,14 @@ class AdminRequestTest : public MainCommonTest { return std::make_unique(); }, true, false); + return main_common_->adminRequest("/stream", "GET"); } using ChunksBytes = std::pair; - ChunksBytes adminStreamingRequest(absl::string_view path, absl::string_view method) { + ChunksBytes runStreamingRequest(MainCommonBase::AdminResponseSharedPtr response, + std::function chunk_hook = nullptr) { absl::Notification done; std::vector out; - MainCommonBase::AdminResponsePtr response = main_common_->adminRequest(path, method); absl::Notification headers_notify; response->getHeaders( [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); @@ -337,6 +338,9 @@ class AdminRequestTest : public MainCommonTest { chunk_notify.Notify(); }); chunk_notify.WaitForNotification(); + if (chunk_hook != nullptr) { + chunk_hook(); + } } return ChunksBytes(num_chunks, num_bytes); @@ -430,8 +434,8 @@ TEST_P(AdminRequestTest, AdminStreamingRequestGetStatsAndQuit) { startEnvoy(); started_.WaitForNotification(); - setupStreamingRequest("/stream"); - ChunksBytes chunks_bytes = adminStreamingRequest("/stream", "GET"); + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, chunks_bytes.second); @@ -440,6 +444,74 @@ TEST_P(AdminRequestTest, AdminStreamingRequestGetStatsAndQuit) { EXPECT_TRUE(waitForEnvoyToExit()); } +TEST_P(AdminRequestTest, AdminStreamingQuitDuringChunks) { + startEnvoy(); + started_.WaitForNotification(); + + setupStreamingRequest("/stream"); + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { + if (++quit_counter == chunks_to_send_before_quitting) { + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); + } + }); + EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); +} + +TEST_P(AdminRequestTest, AdminStreamingCancelDuringChunks) { + startEnvoy(); + started_.WaitForNotification(); + + setupStreamingRequest("/stream"); + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { + if (++quit_counter == chunks_to_send_before_quitting) { + response->cancel(); + } + }); + EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); +} + +TEST_P(AdminRequestTest, AdminStreamingCancelBeforeAskingForHeader) { + startEnvoy(); + started_.WaitForNotification(); + + setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + response->cancel(); + int header_calls = 0; + + // After 'cancel', the headers function will not be called. + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); + EXPECT_EQ(0, header_calls); +} + +TEST_P(AdminRequestTest, AdminStreamingQuitBeforeHeaders) { + startEnvoy(); + started_.WaitForNotification(); + + setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(1, chunks_bytes.first); + EXPECT_EQ(0, chunks_bytes.second); +} + // no signals on Windows -- could probably make this work with GenerateConsoleCtrlEvent #ifndef WIN32 // This test is identical to the above one, except that instead of using an admin /quitquitquit, diff --git a/test/mocks/server/admin.h b/test/mocks/server/admin.h index 9c8b1875a018..a5be3ab379a0 100644 --- a/test/mocks/server/admin.h +++ b/test/mocks/server/admin.h @@ -40,7 +40,6 @@ class MockAdmin : public Admin { MOCK_METHOD(void, addListenerToHandler, (Network::ConnectionHandler * handler)); MOCK_METHOD(uint32_t, concurrency, (), (const)); MOCK_METHOD(void, closeSocket, ()); - MOCK_METHOD(GenRequestFn, createRequestFunction, (), (const)); MOCK_METHOD(RequestPtr, makeRequest, (AdminStream & admin_stream), (const)); NiceMock config_tracker_; diff --git a/test/server/admin/admin_filter_test.cc b/test/server/admin/admin_filter_test.cc index 9627cfd1ca31..fbf6c93cd72d 100644 --- a/test/server/admin/admin_filter_test.cc +++ b/test/server/admin/admin_filter_test.cc @@ -7,27 +7,28 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::ByMove; using testing::InSequence; using testing::NiceMock; +using testing::Return; namespace Envoy { namespace Server { class AdminFilterTest : public testing::TestWithParam { public: - AdminFilterTest() : filter_(adminHandlerCallback), request_headers_{{":path", "/"}} { + AdminFilterTest() : filter_(admin_), request_headers_{{":path", "/"}} { + EXPECT_CALL(admin_, makeRequest(_)).WillOnce(Return(ByMove(adminHandlerCallback()))); filter_.setDecoderFilterCallbacks(callbacks_); } - NiceMock server_; + NiceMock admin_; Stats::IsolatedStoreImpl listener_scope_; AdminFilter filter_; NiceMock callbacks_; Http::TestRequestHeaderMapImpl request_headers_; - static Admin::RequestPtr adminHandlerCallback(AdminStream& admin_stream) { - // silence compiler warnings for unused params - UNREFERENCED_PARAMETER(admin_stream); + static Admin::RequestPtr adminHandlerCallback() { return AdminImpl::makeStaticTextRequest("OK\n", Http::Code::OK); } }; diff --git a/test/server/admin/admin_instance.cc b/test/server/admin/admin_instance.cc index 7b18c448e18f..a2b4d2f15d17 100644 --- a/test/server/admin/admin_instance.cc +++ b/test/server/admin/admin_instance.cc @@ -10,7 +10,7 @@ namespace Server { AdminInstanceTest::AdminInstanceTest() : cpu_profile_path_(TestEnvironment::temporaryPath("envoy.prof")), admin_(cpu_profile_path_, server_, false), request_headers_{{":path", "/"}}, - admin_filter_(admin_.createRequestFunction()) { + admin_filter_(admin_) { std::list access_logs; Filesystem::FilePathAndType file_info{Filesystem::DestinationType::File, "/dev/null"}; access_logs.emplace_back(new Extensions::AccessLoggers::File::FileAccessLog( From b56ecb4c01729427a937cf1bc94d26dc147bbc28 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 18 Feb 2024 17:48:46 -0500 Subject: [PATCH 05/39] try to fix coverage and compiling without admin. Signed-off-by: Joshua Marantz --- envoy/server/admin.h | 2 +- source/exe/main_common.cc | 5 ++++- test/server/config_validation/BUILD | 1 + test/server/config_validation/server_test.cc | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/envoy/server/admin.h b/envoy/server/admin.h index 10c4e35d47fb..48c1629fef44 100644 --- a/envoy/server/admin.h +++ b/envoy/server/admin.h @@ -283,7 +283,7 @@ class Admin { virtual void closeSocket() PURE; /** - * Creates a Request from the request in the admin stream. + * Creates a streaming request context from the url path in the admin stream. */ virtual RequestPtr makeRequest(AdminStream& admin_stream) const PURE; }; diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index ea9111721db9..a2bcadba254e 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -60,13 +60,16 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::move(process_context), createFunction()) {} bool MainCommonBase::run() { - // Avoid returning from cases due to gcc warnings and uncovered lines. + // Avoid returning from inside switch cases to minimize uncovered lines + // while avoid gcc warnings by hitting the final return. bool ret = true; switch (options_.mode()) { case Server::Mode::Serve: runServer(); +#ifdef ENVOY_ADMIN_FUNCTIONALITY terminateAdminRequests(); +#endif break; case Server::Mode::Validate: ret = Server::validateConfig( diff --git a/test/server/config_validation/BUILD b/test/server/config_validation/BUILD index 22184df662cc..a81fe4773d4e 100644 --- a/test/server/config_validation/BUILD +++ b/test/server/config_validation/BUILD @@ -59,6 +59,7 @@ envoy_cc_test( "//source/extensions/filters/network/http_connection_manager:config", "//source/extensions/listener_managers/validation_listener_manager:validation_listener_manager_lib", "//source/extensions/transport_sockets/tls:config", + "//source/server/admin:admin_filter_lib", "//source/server/config_validation:server_lib", "//test/integration:integration_lib", "//test/mocks/network:network_mocks", diff --git a/test/server/config_validation/server_test.cc b/test/server/config_validation/server_test.cc index 8102b84b01d4..6462503b2cc8 100644 --- a/test/server/config_validation/server_test.cc +++ b/test/server/config_validation/server_test.cc @@ -4,6 +4,7 @@ #include "envoy/server/filter_config.h" #include "source/extensions/listener_managers/validation_listener_manager/validation_listener_manager.h" +#include "source/server/admin/admin_filter.h" #include "source/server/config_validation/server.h" #include "source/server/process_context_impl.h" @@ -207,6 +208,8 @@ TEST_P(ValidationServerTest, DummyMethodsTest) { server.admin()->addListenerToHandler(nullptr); server.admin()->closeSocket(); server.admin()->startHttpListener({}, nullptr, nullptr); + AdminFilter filter(*server.admin()); + EXPECT_TRUE(server.admin()->makeRequest(filter) == nullptr); Network::MockTcpListenerCallbacks listener_callbacks; Network::MockListenerConfig listener_config; From aed8263c49f5f94f76a62af0053d7aa0da7f1cc7 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sun, 18 Feb 2024 23:19:03 -0500 Subject: [PATCH 06/39] checkpoint Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 67 +++++++++++++++++++++++++++------------ source/exe/main_common.h | 47 +++++++++++++++++++-------- 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index a2bcadba254e..b6094837b946 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -106,6 +106,7 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string } namespace { + class AdminResponseImpl : public MainCommonBase::AdminResponse { public: using CleanupFn = std::function; @@ -118,6 +119,29 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { } ~AdminResponseImpl() { + // MainCommonBase::response_set_ holds a raw pointer to all outstanding + // responses (not a shared_ptr). So when destructing the response + // we must call cleanup on MainCommonBase if this has not already + // occurred. + // + // Note it's also possible for MainCommonBase to be deleted before + // AdminResponseImpl, in which case it will use its resposne_set_ + // to call terminate, so we'll track that here and skip calling + // cleanup_ in that case. + // + // We must consider a race between MainCommonBase::terminateAdminRequests + // and the this destructor. Note that terminateAdminRequests takes + // FIX ME + + if (!terminated_) { + cleanup_(this); + } + + + // cleanup_(this); + // cleanup_ = null; + // } + /* bool call_cleanup = false; { absl::MutexLock lock(&mutex_); @@ -128,6 +152,10 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { if (call_cleanup) { cleanup_(this); } + */ + //if (main_common_ != nullptr) { + // main_common_->detachResponse(this); + //] } } void getHeaders(HeadersFn fn) override { @@ -185,26 +213,14 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // admin response, and so calls to getHeader and nextChunk remain valid, // resulting in 503 and an empty body. void terminate() override { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + absl::MutexLock lock(&mutex_); terminated_ = true; sendErrorLockHeld(); sendAbortChunkLockHeld(); } - /*void handleBody(Buffer::Instance& chunk, bool more) { - // This always gets called from the main thread since that's - // where admin runs. But the caller may delete the Response - // from any thread. - absl::MutexLock lock(&mutex_); - if (!cancelled_ && body_fn_ != nullptr) { - if (more || !sent_end_stream_) { - sent_end_stream_ = !more; - body_fn_(chunk, more); - } - body_fn_ = nullptr; - } - }*/ - private: void requestHeaders() { { @@ -291,16 +307,25 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); absl::Mutex mutex_; }; + } // namespace void MainCommonBase::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); - absl::flat_hash_set response_set; - absl::MutexLock lock(&mutex_); - while (!response_set_.empty()) { - auto iter = response_set_.begin(); - (*iter)->terminate(); - response_set_.erase(iter); + + // Move the response-set into a local so we can terminate the + // responses without holding our mutex, as the response-termination + // can call back into MainCommonBase without deadlocking. + //absl::flat_hash_set response_set; + { + absl::MutexLock lock(&mutex_); + //response_set = std::move(response_set_); + //response_set_.clear(); + // } + + for (AdminResponse* response : response_set_) { + response->terminate(); + } } } @@ -311,7 +336,7 @@ void MainCommonBase::detachResponse(AdminResponse* response) { MainCommonBase::AdminResponseSharedPtr MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { - auto response = std::make_unique( + auto response = std::make_shared( *server(), path_and_query, method, [this](AdminResponse* response) { detachResponse(response); }); absl::MutexLock lock(&mutex_); diff --git a/source/exe/main_common.h b/source/exe/main_common.h index a5839515093d..b822e39b23c1 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -34,6 +34,23 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY + // Holds context for a streaming response from the admin system, enablimg + // flow-control into another system. This is particularly important when the + // generated response is very large, such that holding it in memory may cause + // fragmention or out-of-memory failures. It is possible to interleave xDS + // response handling, overload management, and other admin requests during the + // streaming of a long admin response. + // + // There can be be multiple AdminResponses at a time; each are separately + // managed. However they will obtain their data from Envoy functions that + // run on the main thread. + // + // Responses may still be active after the server has shut down, and is no + // longer running its main thread dispatcher. In this state, the callbacks + // will be called with appropriate error codes. + // + // Requests can also be cancelled explicitly by calling cancel(). After + // cancel() is called, no further callbacks will be called by the response. class AdminResponse : public std::enable_shared_from_this { public: virtual ~AdminResponse() = default; @@ -47,10 +64,8 @@ class MainCommonBase : public StrippedMainBase { * such a call on another thread, after HeadersFn returns. Calling * nextChunk from HeadersFn may deadlock. * - * If the server is shut down during the operation, headersFn will - * be called with a 503. - * - * It is a programming error to call getHeaders after calling cancel. + * If the server is shut down during the operation, headersFn may + * be called with a 503, if it has not already been called. * * @param fn The function to be called with the headers and status code. */ @@ -70,9 +85,8 @@ class MainCommonBase : public StrippedMainBase { * nextChunk from BodyFn may deadlock. * * If the server is shut down during the operation, bodyFn will - * be called with an empty body and 'false' for more_data. - * - * It is a programming error to call nextChunk after calling cancel. + * be called with an empty body and 'false' for more_data, if + * this has not already occurred. * * @param fn A function to be called on each chunk. */ @@ -81,15 +95,22 @@ class MainCommonBase : public StrippedMainBase { /** * Requests that any outstanding callbacks be dropped. This can be called - * when the context in which the request is made is destroyed. This can be - * useful to allow an application above to register a timeout. The Response - * itself is held as a shared_ptr as that makes it much easier to manage - * cancellation across multiple threads. + * when the context in which the request is made is destroyed. This enables + * an application to implement a. The Response itself is held as a + * shared_ptr as that makes it much easier to manage cancellation across + * multiple threads. */ virtual void cancel() PURE; - // Called when the server is terminated. The response remains - // valid after this. + private: + friend class MainCommonBase; + + /** + * Called when the server is terminated. This calls any outstanding + * callbacks to be called. If nextChunk is called after termination, + * its callback is called false for the second arg, indicating + * end of stream. + */ virtual void terminate() PURE; }; using AdminResponseSharedPtr = std::shared_ptr; From 3973c923759894a30236d30846359320649a4799 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 00:25:12 -0500 Subject: [PATCH 07/39] comment and cleanup Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 73 +++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index b6094837b946..e840407c11d8 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -125,44 +125,29 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // occurred. // // Note it's also possible for MainCommonBase to be deleted before - // AdminResponseImpl, in which case it will use its resposne_set_ + // AdminResponseImpl, in which case it will use its response_set_ // to call terminate, so we'll track that here and skip calling // cleanup_ in that case. - // - // We must consider a race between MainCommonBase::terminateAdminRequests - // and the this destructor. Note that terminateAdminRequests takes - // FIX ME - if (!terminated_) { - cleanup_(this); - } - - - // cleanup_(this); - // cleanup_ = null; - // } - /* - bool call_cleanup = false; - { - absl::MutexLock lock(&mutex_); - if (!terminated_) { - call_cleanup = true; - } - } - if (call_cleanup) { - cleanup_(this); + detaching_ = true; + + // If there is a terminate/destruct race, calling cleanup_ below + // will lock MainCommonBase::mutex_, which is being held by + // terminateAdminRequests, so will block here until all the + // terminate calls are made. Once terminateAdminRequests + // release its lock, cleanup_ will return and the object + // can be safely destructed. + cleanup_(this); // lock in MainCommonBase } - */ - //if (main_common_ != nullptr) { - // main_common_->detachResponse(this); - //] } } void getHeaders(HeadersFn fn) override { + // First check for cancelling or termination. { absl::MutexLock lock(&mutex_); ASSERT(headers_fn_ == nullptr); if (cancelled_) { + // We do not call callbacks after cancel(). return; } headers_fn_ = fn; @@ -195,7 +180,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // in which case the callbacks, held in a shared_ptr, will be cancelled // from the destructor. If that happens *before* we post to the main thread, // we will just skip and never call fn. - server_.dispatcher().post([this, response = shared_from_this()]() { RequestNextChunk(); }); + server_.dispatcher().post([this, response = shared_from_this()]() { requestNextChunk(); }); } // Called by the user if it is not longer interested in the result of the @@ -216,6 +201,13 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { ASSERT_IS_MAIN_OR_TEST_THREAD(); absl::MutexLock lock(&mutex_); + + // If the Envoy server termination races with destruction, we set detaching_ + // first in the destructor and check here to avoid writing to the object as + // it is being destroyed. + if (detaching_) { + return; + } terminated_ = true; sendErrorLockHeld(); sendAbortChunkLockHeld(); @@ -223,6 +215,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { private: void requestHeaders() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); { absl::MutexLock lock(&mutex_); if (cancelled_ || terminated_) { @@ -243,7 +236,8 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { } } - void RequestNextChunk() { + void requestNextChunk() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); { absl::MutexLock lock(&mutex_); if (cancelled_ || terminated_) { @@ -300,6 +294,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // True if the Envoy server has stopped running its main loop. Headers // and boxy callbacks are called after this. bool terminated_ ABSL_GUARDED_BY(mutex_) = false; + bool detaching_ ABSL_GUARDED_BY(mutex_) = false; // bool sent_headers_ ABSL_GUARDED_BY(mutex_) = false; bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; @@ -313,19 +308,13 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { void MainCommonBase::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); - // Move the response-set into a local so we can terminate the - // responses without holding our mutex, as the response-termination - // can call back into MainCommonBase without deadlocking. - //absl::flat_hash_set response_set; - { - absl::MutexLock lock(&mutex_); - //response_set = std::move(response_set_); - //response_set_.clear(); - // } - - for (AdminResponse* response : response_set_) { - response->terminate(); - } + absl::MutexLock lock(&mutex_); + for (AdminResponse* response : response_set_) { + // Consider the possibility of response being deleted due to its creator + // dropping its last reference right here. From its destructor it will call + // detachResponse(), which is mutex-ed against this loop, so before the + // memory becomes invalid, the call to terminate will complete. + response->terminate(); } } From 9237ff9d21473e2893d6c4a1ae0450f0ed6be1e9 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 08:49:41 -0500 Subject: [PATCH 08/39] format Signed-off-by: Joshua Marantz --- source/exe/main_common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/exe/main_common.h b/source/exe/main_common.h index b822e39b23c1..05b61ed19729 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -34,10 +34,10 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY - // Holds context for a streaming response from the admin system, enablimg + // Holds context for a streaming response from the admin system, enabling // flow-control into another system. This is particularly important when the // generated response is very large, such that holding it in memory may cause - // fragmention or out-of-memory failures. It is possible to interleave xDS + // fragmentation or out-of-memory failures. It is possible to interleave xDS // response handling, overload management, and other admin requests during the // streaming of a long admin response. // @@ -102,7 +102,7 @@ class MainCommonBase : public StrippedMainBase { */ virtual void cancel() PURE; - private: + private: friend class MainCommonBase; /** From 895e552b391fe4d8594f34329a5d1d2055968895 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 09:55:38 -0500 Subject: [PATCH 09/39] add asserts, refactor tests, and add quit/cancel race. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 28 +++- source/exe/main_common.h | 1 + test/exe/main_common_test.cc | 301 +++++++++++++++++------------------ 3 files changed, 172 insertions(+), 158 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index e840407c11d8..56f60d55d154 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -200,11 +200,16 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { void terminate() override { ASSERT_IS_MAIN_OR_TEST_THREAD(); + // To handle a potential race of Envoy terminate() destruction, we set + // detaching_ first in the destructor and check here to avoid writing to the + // object as it is being destroyed. If terminate() locks first, the + // destructor will block on it and then early exit. If the destructor hits + // during the loop in terminateAdminRequests, it will first set + // detaching_=true, and then call MainCommonBase::detachResponse which will + // acquire MainCommonBase::mutex_, held by terminateAdminRequests. This + // will allow the loop to complete cleanly before detachResponse attempts + // to mutate the set. absl::MutexLock lock(&mutex_); - - // If the Envoy server termination races with destruction, we set detaching_ - // first in the destructor and check here to avoid writing to the object as - // it is being destroyed. if (detaching_) { return; } @@ -309,6 +314,7 @@ void MainCommonBase::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); absl::MutexLock lock(&mutex_); + accepting_admin_requests_ = false; for (AdminResponse* response : response_set_) { // Consider the possibility of response being deleted due to its creator // dropping its last reference right here. From its destructor it will call @@ -316,11 +322,17 @@ void MainCommonBase::terminateAdminRequests() { // memory becomes invalid, the call to terminate will complete. response->terminate(); } + response_set_.clear(); } void MainCommonBase::detachResponse(AdminResponse* response) { absl::MutexLock lock(&mutex_); - response_set_.erase(response); + + // In a race between ~AdminResponseImpl and terminateAdminRequests, + // the response_set_ may already have been cleared and the erasure + // below will not have any effect; this is OK. + int erased = response_set_.erase(response); + ASSERT(erased == 1 || !accepting_admin_requests_); } MainCommonBase::AdminResponseSharedPtr @@ -329,7 +341,11 @@ MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view *server(), path_and_query, method, [this](AdminResponse* response) { detachResponse(response); }); absl::MutexLock lock(&mutex_); - response_set_.insert(response.get()); + if (accepting_admin_requests_) { + response_set_.insert(response.get()); + } else { + response->terminate(); + } return response; } #endif diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 05b61ed19729..17b7a1666d3a 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -146,6 +146,7 @@ class MainCommonBase : public StrippedMainBase { absl::Mutex mutex_; absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); + bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; #endif }; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 8e89bb3a8154..b332f012c151 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -45,23 +45,6 @@ const std::string& outOfMemoryPattern() { } #endif -class StreamingAdminRequest : public Envoy::Server::Admin::Request { -public: - static constexpr uint64_t NumChunks = 10; - static constexpr uint64_t BytesPerChunk = 10000; - - StreamingAdminRequest() : chunk_(BytesPerChunk, 'a') {} - Http::Code start(Http::ResponseHeaderMap&) override { return Http::Code::OK; } - bool nextChunk(Buffer::Instance& response) override { - response.add(chunk_); - return --chunks_remaining_ > 0; - } - -private: - const std::string chunk_; - uint64_t chunks_remaining_{NumChunks}; -}; - } // namespace /** @@ -240,6 +223,16 @@ TEST_P(MainCommonTest, RetryDynamicBaseIdFails) { #endif } +// Verifies that the Logger::Registry is usable after constructing and +// destructing MainCommon. +TEST_P(MainCommonTest, ConstructDestructLogger) { + VERBOSE_EXPECT_NO_THROW(MainCommon main_common(argc(), argv())); + + const std::string logger_name = "logger"; + spdlog::details::log_msg log_msg(logger_name, spdlog::level::level_enum::err, "error"); + Logger::Registry::getSink()->log(log_msg); +} + // Test that std::set_new_handler() was called and the callback functions as expected. // This test fails under TSAN and ASAN, so don't run it in that build: // [ DEATH ] ==845==ERROR: ThreadSanitizer: requested allocation size 0x3e800000000 @@ -304,48 +297,6 @@ class AdminRequestTest : public MainCommonTest { return out; } - MainCommonBase::AdminResponseSharedPtr setupStreamingRequest(absl::string_view prefix) { - Server::Admin& admin = *main_common_->server()->admin(); - admin.addStreamingHandler( - std::string(prefix), "streaming api", - [](Server::AdminStream&) -> Server::Admin::RequestPtr { - return std::make_unique(); - }, - true, false); - return main_common_->adminRequest("/stream", "GET"); - } - - using ChunksBytes = std::pair; - ChunksBytes runStreamingRequest(MainCommonBase::AdminResponseSharedPtr response, - std::function chunk_hook = nullptr) { - absl::Notification done; - std::vector out; - absl::Notification headers_notify; - response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); - headers_notify.WaitForNotification(); - bool cont = true; - uint64_t num_chunks = 0; - uint64_t num_bytes = 0; - while (cont) { - absl::Notification chunk_notify; - response->nextChunk( - [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { - cont = more; - num_bytes += chunk.length(); - chunk.drain(chunk.length()); - ++num_chunks; - chunk_notify.Notify(); - }); - chunk_notify.WaitForNotification(); - if (chunk_hook != nullptr) { - chunk_hook(); - } - } - - return ChunksBytes(num_chunks, num_bytes); - } - // Initiates Envoy running in its own thread. void startEnvoy() { envoy_thread_ = Thread::threadFactoryForTest().createThread([this]() { @@ -405,6 +356,11 @@ class AdminRequestTest : public MainCommonTest { return envoy_return_; } + void quitAndWait() { + adminRequest("/quitquitquit", "POST"); + EXPECT_TRUE(waitForEnvoyToExit()); + } + Stats::IsolatedStoreImpl stats_store_; std::unique_ptr envoy_thread_; std::unique_ptr main_common_; @@ -426,90 +382,7 @@ TEST_P(AdminRequestTest, AdminRequestGetStatsAndQuit) { startEnvoy(); started_.WaitForNotification(); EXPECT_THAT(adminRequest("/stats", "GET"), HasSubstr("filesystem.reopen_failed")); - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); -} - -TEST_P(AdminRequestTest, AdminStreamingRequestGetStatsAndQuit) { - startEnvoy(); - started_.WaitForNotification(); - - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); - EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); - - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); -} - -TEST_P(AdminRequestTest, AdminStreamingQuitDuringChunks) { - startEnvoy(); - started_.WaitForNotification(); - - setupStreamingRequest("/stream"); - int quit_counter = 0; - static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); - ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { - if (++quit_counter == chunks_to_send_before_quitting) { - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); - } - }); - EXPECT_EQ(4, chunks_bytes.first); - EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); -} - -TEST_P(AdminRequestTest, AdminStreamingCancelDuringChunks) { - startEnvoy(); - started_.WaitForNotification(); - - setupStreamingRequest("/stream"); - int quit_counter = 0; - static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); - ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { - if (++quit_counter == chunks_to_send_before_quitting) { - response->cancel(); - } - }); - EXPECT_EQ(4, chunks_bytes.first); - EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); -} - -TEST_P(AdminRequestTest, AdminStreamingCancelBeforeAskingForHeader) { - startEnvoy(); - started_.WaitForNotification(); - - setupStreamingRequest("/stream"); - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); - response->cancel(); - int header_calls = 0; - - // After 'cancel', the headers function will not be called. - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); - EXPECT_EQ(0, header_calls); -} - -TEST_P(AdminRequestTest, AdminStreamingQuitBeforeHeaders) { - startEnvoy(); - started_.WaitForNotification(); - - setupStreamingRequest("/stream"); - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(1, chunks_bytes.first); - EXPECT_EQ(0, chunks_bytes.second); + quitAndWait(); } // no signals on Windows -- could probably make this work with GenerateConsoleCtrlEvent @@ -600,8 +473,7 @@ TEST_P(AdminRequestTest, AdminRequestBeforeRun) { // We don't get a notification when run(), so it's not safe to check whether the // admin handler is called until after we quit. - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); + quitAndWait(); EXPECT_TRUE(admin_handler_was_called); // This just checks that some stat output was reported. We could pick any stat. @@ -656,14 +528,139 @@ TEST_P(AdminRequestTest, AdminRequestAfterRun) { EXPECT_EQ(1, lambda_destroy_count); } -// Verifies that the Logger::Registry is usable after constructing and -// destructing MainCommon. -TEST_P(MainCommonTest, ConstructDestructLogger) { - VERBOSE_EXPECT_NO_THROW(MainCommon main_common(argc(), argv())); +class AdminStreamingTest : public AdminRequestTest { +protected: + class StreamingAdminRequest : public Envoy::Server::Admin::Request { + public: + static constexpr uint64_t NumChunks = 10; + static constexpr uint64_t BytesPerChunk = 10000; + + StreamingAdminRequest() : chunk_(BytesPerChunk, 'a') {} + Http::Code start(Http::ResponseHeaderMap&) override { return Http::Code::OK; } + bool nextChunk(Buffer::Instance& response) override { + response.add(chunk_); + return --chunks_remaining_ > 0; + } - const std::string logger_name = "logger"; - spdlog::details::log_msg log_msg(logger_name, spdlog::level::level_enum::err, "error"); - Logger::Registry::getSink()->log(log_msg); + private: + const std::string chunk_; + uint64_t chunks_remaining_{NumChunks}; + }; + + MainCommonBase::AdminResponseSharedPtr setupStreamingRequest(absl::string_view prefix) { + startEnvoy(); + started_.WaitForNotification(); + Server::Admin& admin = *main_common_->server()->admin(); + admin.addStreamingHandler( + std::string(prefix), "streaming api", + [](Server::AdminStream&) -> Server::Admin::RequestPtr { + return std::make_unique(); + }, + true, false); + return main_common_->adminRequest(prefix, "GET"); + } + + using ChunksBytes = std::pair; + ChunksBytes runStreamingRequest(MainCommonBase::AdminResponseSharedPtr response, + std::function chunk_hook = nullptr) { + absl::Notification done; + std::vector out; + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + bool cont = true; + uint64_t num_chunks = 0; + uint64_t num_bytes = 0; + while (cont) { + absl::Notification chunk_notify; + response->nextChunk( + [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { + cont = more; + num_bytes += chunk.length(); + chunk.drain(chunk.length()); + ++num_chunks; + chunk_notify.Notify(); + }); + chunk_notify.WaitForNotification(); + if (chunk_hook != nullptr) { + chunk_hook(); + } + } + + return ChunksBytes(num_chunks, num_bytes); + } +}; + +INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, + testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), + TestUtility::ipTestParamsToString); + +TEST_P(AdminStreamingTest, AdminStreamingRequestGetStatsAndQuit) { + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); + EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + quitAndWait(); +} + +TEST_P(AdminStreamingTest, AdminStreamingQuitDuringChunks) { + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { + if (++quit_counter == chunks_to_send_before_quitting) { + quitAndWait(); + } + }); + EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); +} + +TEST_P(AdminStreamingTest, AdminStreamingCancelDuringChunks) { + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { + if (++quit_counter == chunks_to_send_before_quitting) { + response->cancel(); + } + }); + EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + quitAndWait(); +} + +TEST_P(AdminStreamingTest, AdminStreamingCancelBeforeAskingForHeader) { + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + response->cancel(); + int header_calls = 0; + + // After 'cancel', the headers function will not be called. + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + +TEST_P(AdminStreamingTest, AdminStreamingQuitBeforeHeaders) { + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + quitAndWait(); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(1, chunks_bytes.first); + EXPECT_EQ(0, chunks_bytes.second); +} + +TEST_P(AdminStreamingTest, QuitTerminateRace) { + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + // Initiates a streaming quit on the main thread, but do not wait for it. + MainCommonBase::AdminResponseSharedPtr quit_response = + main_common_->adminRequest("/quitquitquit", "POST"); + quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + response.reset(); // Races with the quitquitquit + EXPECT_TRUE(waitForEnvoyToExit()); } } // namespace Envoy From 4784a23d2d0288bfa8ccf2251fcd17371922744e Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 10:43:40 -0500 Subject: [PATCH 10/39] add cancel/quit race test Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index b332f012c151..1b359e60852b 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -653,7 +653,7 @@ TEST_P(AdminStreamingTest, AdminStreamingQuitBeforeHeaders) { EXPECT_EQ(0, chunks_bytes.second); } -TEST_P(AdminStreamingTest, QuitTerminateRace) { +TEST_P(AdminStreamingTest, QuitDeleteRace) { MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); // Initiates a streaming quit on the main thread, but do not wait for it. MainCommonBase::AdminResponseSharedPtr quit_response = @@ -663,4 +663,14 @@ TEST_P(AdminStreamingTest, QuitTerminateRace) { EXPECT_TRUE(waitForEnvoyToExit()); } +TEST_P(AdminStreamingTest, QuitCancelRace) { + MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + // Initiates a streaming quit on the main thread, but do not wait for it. + MainCommonBase::AdminResponseSharedPtr quit_response = + main_common_->adminRequest("/quitquitquit", "POST"); + quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + response->cancel(); // Races with the quitquitquit + EXPECT_TRUE(waitForEnvoyToExit()); +} + } // namespace Envoy From d2526595148d224fec6a8f611f06b64388cda1d1 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 15:08:33 -0500 Subject: [PATCH 11/39] tighten up logic and asserts. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 41 +++++++++++++------------------- test/exe/main_common_test.cc | 46 ++++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 56f60d55d154..39b866668dcf 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -129,8 +129,6 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // to call terminate, so we'll track that here and skip calling // cleanup_ in that case. if (!terminated_) { - detaching_ = true; - // If there is a terminate/destruct race, calling cleanup_ below // will lock MainCommonBase::mutex_, which is being held by // terminateAdminRequests, so will block here until all the @@ -199,20 +197,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // resulting in 503 and an empty body. void terminate() override { ASSERT_IS_MAIN_OR_TEST_THREAD(); - - // To handle a potential race of Envoy terminate() destruction, we set - // detaching_ first in the destructor and check here to avoid writing to the - // object as it is being destroyed. If terminate() locks first, the - // destructor will block on it and then early exit. If the destructor hits - // during the loop in terminateAdminRequests, it will first set - // detaching_=true, and then call MainCommonBase::detachResponse which will - // acquire MainCommonBase::mutex_, held by terminateAdminRequests. This - // will allow the loop to complete cleanly before detachResponse attempts - // to mutate the set. absl::MutexLock lock(&mutex_); - if (detaching_) { - return; - } terminated_ = true; sendErrorLockHeld(); sendAbortChunkLockHeld(); @@ -296,13 +281,19 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { // callbacks are never called after cancel(). bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; - // True if the Envoy server has stopped running its main loop. Headers - // and boxy callbacks are called after this. + // True if the Envoy server has stopped running its main loop. Headers and + // body requests can be initiated and called back are called after terminate, + // so callers do not have to special case this -- the request will simply fail + // with an empty response. bool terminated_ ABSL_GUARDED_BY(mutex_) = false; - bool detaching_ ABSL_GUARDED_BY(mutex_) = false; - // bool sent_headers_ ABSL_GUARDED_BY(mutex_) = false; + // Used to indicate whether the body function has been called with false + // as its second argument. That must always happen at most once, even + // if terminate races with the normal end-of-stream marker. more=false + // may never be sent if the request is cancelled, nor deleted prior to + // it being requested. bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; + HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); absl::Mutex mutex_; @@ -327,12 +318,14 @@ void MainCommonBase::terminateAdminRequests() { void MainCommonBase::detachResponse(AdminResponse* response) { absl::MutexLock lock(&mutex_); - - // In a race between ~AdminResponseImpl and terminateAdminRequests, - // the response_set_ may already have been cleared and the erasure - // below will not have any effect; this is OK. int erased = response_set_.erase(response); - ASSERT(erased == 1 || !accepting_admin_requests_); + ASSERT(erased == 1); + + // We cannot be detaching a response after we've stopped + // admitting new requests. Once we have terminated the + // active requests, they won't call back to detachResponse. + // See the terminated_ check in the destructor. + ASSERT(accepting_admin_requests_); } MainCommonBase::AdminResponseSharedPtr diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 1b359e60852b..6e0fefa34ccf 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -530,6 +530,8 @@ TEST_P(AdminRequestTest, AdminRequestAfterRun) { class AdminStreamingTest : public AdminRequestTest { protected: + static constexpr absl::string_view StreamingEndpoint = "/stream"; + class StreamingAdminRequest : public Envoy::Server::Admin::Request { public: static constexpr uint64_t NumChunks = 10; @@ -547,17 +549,16 @@ class AdminStreamingTest : public AdminRequestTest { uint64_t chunks_remaining_{NumChunks}; }; - MainCommonBase::AdminResponseSharedPtr setupStreamingRequest(absl::string_view prefix) { + AdminStreamingTest() { startEnvoy(); started_.WaitForNotification(); Server::Admin& admin = *main_common_->server()->admin(); admin.addStreamingHandler( - std::string(prefix), "streaming api", + std::string(StreamingEndpoint), "streaming api", [](Server::AdminStream&) -> Server::Admin::RequestPtr { return std::make_unique(); }, true, false); - return main_common_->adminRequest(prefix, "GET"); } using ChunksBytes = std::pair; @@ -597,7 +598,8 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, TestUtility::ipTestParamsToString); TEST_P(AdminStreamingTest, AdminStreamingRequestGetStatsAndQuit) { - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, @@ -608,7 +610,8 @@ TEST_P(AdminStreamingTest, AdminStreamingRequestGetStatsAndQuit) { TEST_P(AdminStreamingTest, AdminStreamingQuitDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { if (++quit_counter == chunks_to_send_before_quitting) { quitAndWait(); @@ -622,7 +625,8 @@ TEST_P(AdminStreamingTest, AdminStreamingQuitDuringChunks) { TEST_P(AdminStreamingTest, AdminStreamingCancelDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { if (++quit_counter == chunks_to_send_before_quitting) { response->cancel(); @@ -635,7 +639,8 @@ TEST_P(AdminStreamingTest, AdminStreamingCancelDuringChunks) { } TEST_P(AdminStreamingTest, AdminStreamingCancelBeforeAskingForHeader) { - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); response->cancel(); int header_calls = 0; @@ -646,7 +651,8 @@ TEST_P(AdminStreamingTest, AdminStreamingCancelBeforeAskingForHeader) { } TEST_P(AdminStreamingTest, AdminStreamingQuitBeforeHeaders) { - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); quitAndWait(); ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(1, chunks_bytes.first); @@ -654,7 +660,8 @@ TEST_P(AdminStreamingTest, AdminStreamingQuitBeforeHeaders) { } TEST_P(AdminStreamingTest, QuitDeleteRace) { - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); // Initiates a streaming quit on the main thread, but do not wait for it. MainCommonBase::AdminResponseSharedPtr quit_response = main_common_->adminRequest("/quitquitquit", "POST"); @@ -664,7 +671,8 @@ TEST_P(AdminStreamingTest, QuitDeleteRace) { } TEST_P(AdminStreamingTest, QuitCancelRace) { - MainCommonBase::AdminResponseSharedPtr response = setupStreamingRequest("/stream"); + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); // Initiates a streaming quit on the main thread, but do not wait for it. MainCommonBase::AdminResponseSharedPtr quit_response = main_common_->adminRequest("/quitquitquit", "POST"); @@ -673,4 +681,22 @@ TEST_P(AdminStreamingTest, QuitCancelRace) { EXPECT_TRUE(waitForEnvoyToExit()); } +TEST_P(AdminStreamingTest, QuitBeforeCreatingResponse) { + // Initiates a streaming quit on the main thread, and wait for headers, which + // will trigger the termination of the event loop, and subsequent nulling of + // main_common_. However we can pause the test infrastructure after the quit + // takes hold leaving main_common_ in tact, to reproduce a potential race. + pause_after_run_ = true; + adminRequest("/quitquitquit", "POST"); + pause_point_.WaitForNotification(); // run() finished, but main_common_ still exists. + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(1, chunks_bytes.first); + EXPECT_EQ(0, chunks_bytes.second); + resume_.Notify(); + EXPECT_TRUE(waitForEnvoyToExit()); + response.reset(); +} + } // namespace Envoy From 8ba301d7b37b600f7d8f5744504b68d0bba4fdae Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 21:25:42 -0500 Subject: [PATCH 12/39] Cover a few missing lines. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 6 ++-- test/exe/main_common_test.cc | 56 ++++++++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 39b866668dcf..21d09fc423ed 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -62,7 +62,7 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines // while avoid gcc warnings by hitting the final return. - bool ret = true; + bool ret = false; switch (options_.mode()) { case Server::Mode::Serve: @@ -70,6 +70,7 @@ bool MainCommonBase::run() { #ifdef ENVOY_ADMIN_FUNCTIONALITY terminateAdminRequests(); #endif + ret = true; break; case Server::Mode::Validate: ret = Server::validateConfig( @@ -79,9 +80,8 @@ bool MainCommonBase::run() { break; case Server::Mode::InitOnly: PERF_DUMP(); + ret = true; break; - default: - ret = false; } return ret; } diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 6e0fefa34ccf..cd3c1cfd15ef 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -591,13 +591,31 @@ class AdminStreamingTest : public AdminRequestTest { return ChunksBytes(num_chunks, num_bytes); } + + /** + * In order to trigger certain early-exit critiera in a test, we can exploit + * the fact that all the admin responses are delivered on the main thread. + * So we can pause those by blocking the main thread indefinitely. + * + * To resume the main thread, call resume_.Notify(); + * + * @param url the stats endpoint to initiate. + */ + void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { + MainCommonBase::AdminResponseSharedPtr blocked_response = + main_common_->adminRequest(url, method); + absl::Notification block_main_thread; + blocked_response->getHeaders([this](Http::Code, Http::ResponseHeaderMap&) { + resume_.WaitForNotification(); + }); + } }; INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), TestUtility::ipTestParamsToString); -TEST_P(AdminStreamingTest, AdminStreamingRequestGetStatsAndQuit) { +TEST_P(AdminStreamingTest, RequestGetStatsAndQuit) { MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); ChunksBytes chunks_bytes = runStreamingRequest(response); @@ -607,7 +625,7 @@ TEST_P(AdminStreamingTest, AdminStreamingRequestGetStatsAndQuit) { quitAndWait(); } -TEST_P(AdminStreamingTest, AdminStreamingQuitDuringChunks) { +TEST_P(AdminStreamingTest, QuitDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; MainCommonBase::AdminResponseSharedPtr response = @@ -622,7 +640,7 @@ TEST_P(AdminStreamingTest, AdminStreamingQuitDuringChunks) { chunks_bytes.second); } -TEST_P(AdminStreamingTest, AdminStreamingCancelDuringChunks) { +TEST_P(AdminStreamingTest, CancelDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; MainCommonBase::AdminResponseSharedPtr response = @@ -638,7 +656,7 @@ TEST_P(AdminStreamingTest, AdminStreamingCancelDuringChunks) { quitAndWait(); } -TEST_P(AdminStreamingTest, AdminStreamingCancelBeforeAskingForHeader) { +TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader) { MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); response->cancel(); @@ -650,7 +668,35 @@ TEST_P(AdminStreamingTest, AdminStreamingCancelBeforeAskingForHeader) { EXPECT_EQ(0, header_calls); } -TEST_P(AdminStreamingTest, AdminStreamingQuitBeforeHeaders) { +TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); + int header_calls = 0; + blockMainThreadUntilResume(StreamingEndpoint, "GET"); + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response->cancel(); + resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + +TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); + absl::Notification headers_notify; + response->getHeaders([&headers_notify](Http::Code, Http::ResponseHeaderMap&) { + headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + blockMainThreadUntilResume(StreamingEndpoint, "GET"); + int chunk_calls = 0; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + response->cancel(); + resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, chunk_calls); +} + +TEST_P(AdminStreamingTest, QuitBeforeHeaders) { MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); quitAndWait(); From e6a10f30c7cfb7514328cd000b697faae5020486 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 19 Feb 2024 21:37:33 -0500 Subject: [PATCH 13/39] format Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index cd3c1cfd15ef..ac66040016c4 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -593,7 +593,7 @@ class AdminStreamingTest : public AdminRequestTest { } /** - * In order to trigger certain early-exit critiera in a test, we can exploit + * In order to trigger certain early-exit criteria in a test, we can exploit * the fact that all the admin responses are delivered on the main thread. * So we can pause those by blocking the main thread indefinitely. * @@ -605,9 +605,8 @@ class AdminStreamingTest : public AdminRequestTest { MainCommonBase::AdminResponseSharedPtr blocked_response = main_common_->adminRequest(url, method); absl::Notification block_main_thread; - blocked_response->getHeaders([this](Http::Code, Http::ResponseHeaderMap&) { - resume_.WaitForNotification(); - }); + blocked_response->getHeaders( + [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } }; @@ -684,8 +683,8 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); absl::Notification headers_notify; - response->getHeaders([&headers_notify](Http::Code, Http::ResponseHeaderMap&) { - headers_notify.Notify(); }); + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); headers_notify.WaitForNotification(); blockMainThreadUntilResume(StreamingEndpoint, "GET"); int chunk_calls = 0; From 05c1127c3861607980667b1a8924e3e796ad0861 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 07:25:40 -0500 Subject: [PATCH 14/39] hit one more early-exit line in main_common with a contrived test. Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index ac66040016c4..8422a66785b8 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -537,9 +537,14 @@ class AdminStreamingTest : public AdminRequestTest { static constexpr uint64_t NumChunks = 10; static constexpr uint64_t BytesPerChunk = 10000; - StreamingAdminRequest() : chunk_(BytesPerChunk, 'a') {} + /*StreamingAdminRequest(OptRef& pause_chunk) + : chunk_(BytesPerChunk, 'a'), + pause_chunk_(pause_chunk) {}*/ + StreamingAdminRequest(std::function& next_chunk_hook) + : chunk_(BytesPerChunk, 'a'), next_chunk_hook_(next_chunk_hook) {} Http::Code start(Http::ResponseHeaderMap&) override { return Http::Code::OK; } bool nextChunk(Buffer::Instance& response) override { + next_chunk_hook_(); response.add(chunk_); return --chunks_remaining_ > 0; } @@ -547,6 +552,7 @@ class AdminStreamingTest : public AdminRequestTest { private: const std::string chunk_; uint64_t chunks_remaining_{NumChunks}; + std::function& next_chunk_hook_; }; AdminStreamingTest() { @@ -555,8 +561,8 @@ class AdminStreamingTest : public AdminRequestTest { Server::Admin& admin = *main_common_->server()->admin(); admin.addStreamingHandler( std::string(StreamingEndpoint), "streaming api", - [](Server::AdminStream&) -> Server::Admin::RequestPtr { - return std::make_unique(); + [this](Server::AdminStream&) -> Server::Admin::RequestPtr { + return std::make_unique(next_chunk_hook_); }, true, false); } @@ -608,6 +614,8 @@ class AdminStreamingTest : public AdminRequestTest { blocked_response->getHeaders( [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } + + std::function next_chunk_hook_ = []() {}; }; INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, @@ -688,8 +696,11 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { headers_notify.WaitForNotification(); blockMainThreadUntilResume(StreamingEndpoint, "GET"); int chunk_calls = 0; + + // Cause the /streaming handler to pause while yielding the next chunk, to hit + // an early exit in MainCommonBase::requestNextChunk. + next_chunk_hook_ = [response]() { response->cancel(); }; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - response->cancel(); resume_.Notify(); quitAndWait(); EXPECT_EQ(0, chunk_calls); From c40e30a19fca78bfe8fb71246cc7df77e1bf0c0a Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 13:15:01 -0500 Subject: [PATCH 15/39] improve consitency and handling of cancel function. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 13 ++++++++++--- source/exe/main_common.h | 5 +++++ test/exe/main_common_test.cc | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 21d09fc423ed..50602f59f6f6 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -145,7 +145,6 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { absl::MutexLock lock(&mutex_); ASSERT(headers_fn_ == nullptr); if (cancelled_) { - // We do not call callbacks after cancel(). return; } headers_fn_ = fn; @@ -164,7 +163,10 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { absl::MutexLock lock(&mutex_); ASSERT(body_fn_ == nullptr); body_fn_ = fn; - if (terminated_ || cancelled_ || !opt_admin_) { + if (cancelled_) { + return; + } + if (terminated_ || !opt_admin_) { sendAbortChunkLockHeld(); return; } @@ -191,6 +193,11 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { body_fn_ = nullptr; } + bool cancelled() const override { + absl::MutexLock lock(&mutex_); + return cancelled_; + } + // Called from MainCommonBase::terminateAdminRequests when the Envoy server // terminates. After this is called, the caller may need to complete the // admin response, and so calls to getHeader and nextChunk remain valid, @@ -296,7 +303,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); - absl::Mutex mutex_; + mutable absl::Mutex mutex_; }; } // namespace diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 17b7a1666d3a..a365c6871c84 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -102,6 +102,11 @@ class MainCommonBase : public StrippedMainBase { */ virtual void cancel() PURE; + /** + * @return whether the request was cancelled. + */ + virtual bool cancelled() const PURE; + private: friend class MainCommonBase; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 8422a66785b8..34770a2a9c13 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -579,7 +579,7 @@ class AdminStreamingTest : public AdminRequestTest { bool cont = true; uint64_t num_chunks = 0; uint64_t num_bytes = 0; - while (cont) { + while (cont && !response->cancelled()) { absl::Notification chunk_notify; response->nextChunk( [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { @@ -657,16 +657,18 @@ TEST_P(AdminStreamingTest, CancelDuringChunks) { response->cancel(); } }); - EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(3, chunks_bytes.first); // no final call to the chunk handler after cancel. EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, chunks_bytes.second); quitAndWait(); } TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader) { + blockMainThreadUntilResume(StreamingEndpoint, "GET"); MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); response->cancel(); + resume_.Notify(); int header_calls = 0; // After 'cancel', the headers function will not be called. @@ -687,6 +689,34 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { EXPECT_EQ(0, header_calls); } +TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk) { + MainCommonBase::AdminResponseSharedPtr response = + main_common_->adminRequest(StreamingEndpoint, "GET"); + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + + // To hit an early exit in AdminResponseImpl::requestNextChunk we need to + // allow its posting to the main thread, but not let it run. To do that we + // queue up a main-thread callback that blocks on a notification, thus pausing + // Envoy's main thread. + absl::Notification block_cancel; + main_common_->dispatcherForTest().post([response, &block_cancel] { + block_cancel.WaitForNotification(); + response->cancel(); + }); + + int chunk_calls = 0; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + + // Now that we have issued the request for the nextChunk call, we can unblock + // our callback, which will hit the early exit in the chunk handler. + block_cancel.Notify(); + quitAndWait(); + EXPECT_EQ(0, chunk_calls); +} + TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { MainCommonBase::AdminResponseSharedPtr response = main_common_->adminRequest(StreamingEndpoint, "GET"); From 8cfed123bbbc7f5a4ecfb0211c875bb394560434 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 17:27:19 -0500 Subject: [PATCH 16/39] add early exit in handleHeaders to force it to be tested, and refactor tests. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 13 ++-- test/exe/main_common_test.cc | 126 ++++++++++++++++++++--------------- 2 files changed, 80 insertions(+), 59 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 50602f59f6f6..462cebdb9973 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -162,10 +162,10 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { { absl::MutexLock lock(&mutex_); ASSERT(body_fn_ == nullptr); - body_fn_ = fn; if (cancelled_) { return; } + body_fn_ = fn; if (terminated_ || !opt_admin_) { sendAbortChunkLockHeld(); return; @@ -225,11 +225,12 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { code_ = request_->start(*response_headers_); { absl::MutexLock lock(&mutex_); - if (headers_fn_ != nullptr && !cancelled_) { - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - headers_fn_(code_, *response_headers_); - headers_fn_ = nullptr; + if (headers_fn_ == nullptr || cancelled_) { + return; } + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; } } @@ -246,7 +247,7 @@ class AdminResponseImpl : public MainCommonBase::AdminResponse { } { absl::MutexLock lock(&mutex_); - if (sent_end_stream_ || cancelled_ || terminated_) { + if (sent_end_stream_ || cancelled_) { return; } sent_end_stream_ = !more_data_; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 34770a2a9c13..114f8303d867 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -615,6 +615,41 @@ class AdminStreamingTest : public AdminRequestTest { [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } + /** + * To hit an early exit after the second lock in + * AdminResponseImpl::requestNextChunk and AdminResponseImpl::requestHeaders + * we, need to post a cancel request to the main thread, but block that on the + * resume_ notification. This allows a subsequent test call to getHeaders or + * nextChunk initiate a post to main thread, but runs the cancel call on the + * response before headers_fn_ or body_fn_ runs. + * + * @param response the response to cancel after resume_. + */ + void + blockMainThreadAndCancelResponseAfterResume(MainCommonBase::AdminResponseSharedPtr response) { + main_common_->dispatcherForTest().post([response, this] { + resume_.WaitForNotification(); + response->cancel(); + }); + } + + MainCommonBase::AdminResponseSharedPtr streamingResponse() { + return main_common_->adminRequest(StreamingEndpoint, "GET"); + } + + void waitForHeaders(MainCommonBase::AdminResponseSharedPtr response) { + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + } + + void quitAndRequestHeaders() { + MainCommonBase::AdminResponseSharedPtr quit_response = + main_common_->adminRequest("/quitquitquit", "POST"); + quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + } + std::function next_chunk_hook_ = []() {}; }; @@ -623,8 +658,7 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, TestUtility::ipTestParamsToString); TEST_P(AdminStreamingTest, RequestGetStatsAndQuit) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, @@ -635,8 +669,7 @@ TEST_P(AdminStreamingTest, RequestGetStatsAndQuit) { TEST_P(AdminStreamingTest, QuitDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { if (++quit_counter == chunks_to_send_before_quitting) { quitAndWait(); @@ -650,8 +683,7 @@ TEST_P(AdminStreamingTest, QuitDuringChunks) { TEST_P(AdminStreamingTest, CancelDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { if (++quit_counter == chunks_to_send_before_quitting) { response->cancel(); @@ -663,10 +695,9 @@ TEST_P(AdminStreamingTest, CancelDuringChunks) { quitAndWait(); } -TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader) { +TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader1) { blockMainThreadUntilResume(StreamingEndpoint, "GET"); - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); response->cancel(); resume_.Notify(); int header_calls = 0; @@ -677,9 +708,18 @@ TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader) { EXPECT_EQ(0, header_calls); } +TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader2) { + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + blockMainThreadAndCancelResponseAfterResume(response); + int header_calls = 0; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); int header_calls = 0; blockMainThreadUntilResume(StreamingEndpoint, "GET"); response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); @@ -689,41 +729,30 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { EXPECT_EQ(0, header_calls); } -TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); - absl::Notification headers_notify; - response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); - headers_notify.WaitForNotification(); - - // To hit an early exit in AdminResponseImpl::requestNextChunk we need to - // allow its posting to the main thread, but not let it run. To do that we - // queue up a main-thread callback that blocks on a notification, thus pausing - // Envoy's main thread. - absl::Notification block_cancel; - main_common_->dispatcherForTest().post([response, &block_cancel] { - block_cancel.WaitForNotification(); - response->cancel(); - }); - +TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk1) { + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); + response->cancel(); int chunk_calls = 0; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + quitAndWait(); + EXPECT_EQ(0, chunk_calls); +} - // Now that we have issued the request for the nextChunk call, we can unblock - // our callback, which will hit the early exit in the chunk handler. - block_cancel.Notify(); +TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk2) { + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); + blockMainThreadAndCancelResponseAfterResume(response); + int chunk_calls = 0; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + resume_.Notify(); quitAndWait(); EXPECT_EQ(0, chunk_calls); } TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); - absl::Notification headers_notify; - response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); - headers_notify.WaitForNotification(); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); blockMainThreadUntilResume(StreamingEndpoint, "GET"); int chunk_calls = 0; @@ -737,8 +766,7 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { } TEST_P(AdminStreamingTest, QuitBeforeHeaders) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); quitAndWait(); ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(1, chunks_bytes.first); @@ -746,23 +774,16 @@ TEST_P(AdminStreamingTest, QuitBeforeHeaders) { } TEST_P(AdminStreamingTest, QuitDeleteRace) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); // Initiates a streaming quit on the main thread, but do not wait for it. - MainCommonBase::AdminResponseSharedPtr quit_response = - main_common_->adminRequest("/quitquitquit", "POST"); - quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + quitAndRequestHeaders(); response.reset(); // Races with the quitquitquit EXPECT_TRUE(waitForEnvoyToExit()); } TEST_P(AdminStreamingTest, QuitCancelRace) { - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); - // Initiates a streaming quit on the main thread, but do not wait for it. - MainCommonBase::AdminResponseSharedPtr quit_response = - main_common_->adminRequest("/quitquitquit", "POST"); - quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + quitAndRequestHeaders(); response->cancel(); // Races with the quitquitquit EXPECT_TRUE(waitForEnvoyToExit()); } @@ -775,8 +796,7 @@ TEST_P(AdminStreamingTest, QuitBeforeCreatingResponse) { pause_after_run_ = true; adminRequest("/quitquitquit", "POST"); pause_point_.WaitForNotification(); // run() finished, but main_common_ still exists. - MainCommonBase::AdminResponseSharedPtr response = - main_common_->adminRequest(StreamingEndpoint, "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); ChunksBytes chunks_bytes = runStreamingRequest(response); EXPECT_EQ(1, chunks_bytes.first); EXPECT_EQ(0, chunks_bytes.second); From ce301183ef0c20379a6ac4d1c5da3509fc7b0596 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 17:33:45 -0500 Subject: [PATCH 17/39] add comments. Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 114f8303d867..f91d2f029247 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -598,6 +598,13 @@ class AdminStreamingTest : public AdminRequestTest { return ChunksBytes(num_chunks, num_bytes); } + /** + * @return a streaming response to a GET of StreamingEndpoint. + */ + MainCommonBase::AdminResponseSharedPtr streamingResponse() { + return main_common_->adminRequest(StreamingEndpoint, "GET"); + } + /** * In order to trigger certain early-exit criteria in a test, we can exploit * the fact that all the admin responses are delivered on the main thread. @@ -633,10 +640,11 @@ class AdminStreamingTest : public AdminRequestTest { }); } - MainCommonBase::AdminResponseSharedPtr streamingResponse() { - return main_common_->adminRequest(StreamingEndpoint, "GET"); - } - + /** + * Requests the headers and waits until the headers have been sent. + * + * @param response the response from which to get headers. + */ void waitForHeaders(MainCommonBase::AdminResponseSharedPtr response) { absl::Notification headers_notify; response->getHeaders( @@ -644,12 +652,21 @@ class AdminStreamingTest : public AdminRequestTest { headers_notify.WaitForNotification(); } + /** + * Initiates a '/quitquitquit' call and requests the headers for that call, + * but does not wait for the call to complete. We avoid waiting in order to + * trigger a potential race to ensure that MainCommon handles it properly. + */ void quitAndRequestHeaders() { MainCommonBase::AdminResponseSharedPtr quit_response = main_common_->adminRequest("/quitquitquit", "POST"); quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); } + // This variable provides a hook to allow a test method to specify a hook to + // run when nextChunk() is called. This is currently used only for one test, + // CancelAfterAskingForChunk, that initiates a cancel() from within the chunk + // handler. std::function next_chunk_hook_ = []() {}; }; From cd88721a776c52b93bebfefb4f2fd7b30a24791a Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 20:54:57 -0500 Subject: [PATCH 18/39] cover more lines. Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 39 +++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index f91d2f029247..f9d1c126c275 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -537,12 +537,14 @@ class AdminStreamingTest : public AdminRequestTest { static constexpr uint64_t NumChunks = 10; static constexpr uint64_t BytesPerChunk = 10000; - /*StreamingAdminRequest(OptRef& pause_chunk) - : chunk_(BytesPerChunk, 'a'), - pause_chunk_(pause_chunk) {}*/ - StreamingAdminRequest(std::function& next_chunk_hook) - : chunk_(BytesPerChunk, 'a'), next_chunk_hook_(next_chunk_hook) {} - Http::Code start(Http::ResponseHeaderMap&) override { return Http::Code::OK; } + StreamingAdminRequest(std::function& get_headers_hook, + std::function& next_chunk_hook) + : chunk_(BytesPerChunk, 'a'), get_headers_hook_(get_headers_hook), + next_chunk_hook_(next_chunk_hook) {} + Http::Code start(Http::ResponseHeaderMap&) override { + get_headers_hook_(); + return Http::Code::OK; + } bool nextChunk(Buffer::Instance& response) override { next_chunk_hook_(); response.add(chunk_); @@ -552,6 +554,7 @@ class AdminStreamingTest : public AdminRequestTest { private: const std::string chunk_; uint64_t chunks_remaining_{NumChunks}; + std::function& get_headers_hook_; std::function& next_chunk_hook_; }; @@ -562,7 +565,7 @@ class AdminStreamingTest : public AdminRequestTest { admin.addStreamingHandler( std::string(StreamingEndpoint), "streaming api", [this](Server::AdminStream&) -> Server::Admin::RequestPtr { - return std::make_unique(next_chunk_hook_); + return std::make_unique(get_headers_hook_, next_chunk_hook_); }, true, false); } @@ -667,6 +670,7 @@ class AdminStreamingTest : public AdminRequestTest { // run when nextChunk() is called. This is currently used only for one test, // CancelAfterAskingForChunk, that initiates a cancel() from within the chunk // handler. + std::function get_headers_hook_ = []() {}; std::function next_chunk_hook_ = []() {}; }; @@ -713,7 +717,7 @@ TEST_P(AdminStreamingTest, CancelDuringChunks) { } TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader1) { - blockMainThreadUntilResume(StreamingEndpoint, "GET"); + blockMainThreadUntilResume("/ready", "GET"); MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); response->cancel(); resume_.Notify(); @@ -735,10 +739,10 @@ TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader2) { EXPECT_EQ(0, header_calls); } -TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); +TEST_P(AdminStreamingTest, CancelAfterAskingForHeader1) { int header_calls = 0; - blockMainThreadUntilResume(StreamingEndpoint, "GET"); + blockMainThreadUntilResume("/ready", "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); response->cancel(); resume_.Notify(); @@ -746,6 +750,17 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForHeader) { EXPECT_EQ(0, header_calls); } +TEST_P(AdminStreamingTest, CancelAfterAskingForHeader2) { + int header_calls = 0; + // blockMainThreadUntilResume("/ready", "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + get_headers_hook_ = [&response]() { response->cancel(); }; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + // resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk1) { MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); @@ -770,7 +785,7 @@ TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk2) { TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); - blockMainThreadUntilResume(StreamingEndpoint, "GET"); + blockMainThreadUntilResume("/ready", "GET"); int chunk_calls = 0; // Cause the /streaming handler to pause while yielding the next chunk, to hit From 4262d8d0c72cc4f0005ffaef63e8b55bc4e9a740 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 20 Feb 2024 22:20:14 -0500 Subject: [PATCH 19/39] tighten up coverage Signed-off-by: Joshua Marantz --- test/per_file_coverage.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/per_file_coverage.sh b/test/per_file_coverage.sh index 9759ec2c2658..0dc1af3afc39 100755 --- a/test/per_file_coverage.sh +++ b/test/per_file_coverage.sh @@ -21,7 +21,7 @@ declare -a KNOWN_LOW_COVERAGE=( "source/common/signal:87.2" # Death tests don't report LCOV "source/common/thread:0.0" # Death tests don't report LCOV "source/common/watchdog:58.6" # Death tests don't report LCOV -"source/exe:90.3" +"source/exe:94.0" # increased by #32346, need coverage for terminate_handler and hot restart failures "source/extensions/clusters/common:91.5" # This can be increased again once `#24903` lands "source/extensions/common:93.0" #flaky: be careful adjusting "source/extensions/common/proxy_protocol:93.8" # Adjusted for security patch From 47a51cad8b7fc47db7b805fe7eeb9dbfba5ace0e Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 22 Feb 2024 16:54:21 -0500 Subject: [PATCH 20/39] mutex-protect destructor accesss to AdminResponse::terminated_. Make AdminResponse be non-virtual. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 318 +++++++++++++++++--------------------- source/exe/main_common.h | 52 ++++++- 2 files changed, 185 insertions(+), 185 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 462cebdb9973..cf94869efb5d 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -61,7 +61,7 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines - // while avoid gcc warnings by hitting the final return. + // while avoiding gcc warnings by hitting the final return. bool ret = false; switch (options_.mode()) { @@ -105,209 +105,169 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string }); } -namespace { - -class AdminResponseImpl : public MainCommonBase::AdminResponse { -public: - using CleanupFn = std::function; +MainCommonBase::AdminResponse::AdminResponse(Server::Instance& server, absl::string_view path, + absl::string_view method, CleanupFn cleanup) + : server_(server), opt_admin_(server.admin()), cleanup_(cleanup) { + request_headers_->setMethod(method); + request_headers_->setPath(path); +} - AdminResponseImpl(Server::Instance& server, absl::string_view path, absl::string_view method, - CleanupFn cleanup) - : server_(server), opt_admin_(server.admin()), cleanup_(cleanup) { - request_headers_->setMethod(method); - request_headers_->setPath(path); +MainCommonBase::AdminResponse::~AdminResponse() { + // MainCommonBase::response_set_ holds a raw pointer to all outstanding + // responses (not a shared_ptr). So when destructing the response + // we must call cleanup on MainCommonBase if this has not already + // occurred. + // + // Note it's also possible for MainCommonBase to be deleted before + // AdminResponse, in which case it will use its response_set_ + // to call terminate, so we'll track that here and skip calling + // cleanup_ in that case. + absl::MutexLock lock(&mutex_); + if (!terminated_) { + // If there is a terminate/destruct race, calling cleanup_ below + // will lock MainCommonBase::mutex_, which is being held by + // terminateAdminRequests, so will block here until all the + // terminate calls are made. Once terminateAdminRequests + // release its lock, cleanup_ will return and the object + // can be safely destructed. + cleanup_(this); // lock in MainCommonBase } +} - ~AdminResponseImpl() { - // MainCommonBase::response_set_ holds a raw pointer to all outstanding - // responses (not a shared_ptr). So when destructing the response - // we must call cleanup on MainCommonBase if this has not already - // occurred. - // - // Note it's also possible for MainCommonBase to be deleted before - // AdminResponseImpl, in which case it will use its response_set_ - // to call terminate, so we'll track that here and skip calling - // cleanup_ in that case. - if (!terminated_) { - // If there is a terminate/destruct race, calling cleanup_ below - // will lock MainCommonBase::mutex_, which is being held by - // terminateAdminRequests, so will block here until all the - // terminate calls are made. Once terminateAdminRequests - // release its lock, cleanup_ will return and the object - // can be safely destructed. - cleanup_(this); // lock in MainCommonBase +void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { + // First check for cancelling or termination. + { + absl::MutexLock lock(&mutex_); + ASSERT(headers_fn_ == nullptr); + if (cancelled_) { + return; + } + headers_fn_ = fn; + if (terminated_ || !opt_admin_) { + sendErrorLockHeld(); + return; } } + server_.dispatcher().post([this, response = shared_from_this()]() { requestHeaders(); }); +} - void getHeaders(HeadersFn fn) override { - // First check for cancelling or termination. - { - absl::MutexLock lock(&mutex_); - ASSERT(headers_fn_ == nullptr); - if (cancelled_) { - return; - } - headers_fn_ = fn; - if (terminated_ || !opt_admin_) { - sendErrorLockHeld(); - return; - } +void MainCommonBase::AdminResponse::nextChunk(BodyFn fn) { + // Note the caller may race a call to nextChunk with the server being + // terminated. + { + absl::MutexLock lock(&mutex_); + ASSERT(body_fn_ == nullptr); + if (cancelled_) { + return; + } + body_fn_ = fn; + if (terminated_ || !opt_admin_) { + sendAbortChunkLockHeld(); + return; } - server_.dispatcher().post([this, response = shared_from_this()]() { requestHeaders(); }); } - void nextChunk(BodyFn fn) override { - // Note the caller may race a call to nextChunk with the server being - // terminated. - { - absl::MutexLock lock(&mutex_); - ASSERT(body_fn_ == nullptr); - if (cancelled_) { - return; - } - body_fn_ = fn; - if (terminated_ || !opt_admin_) { - sendAbortChunkLockHeld(); - return; - } - } + // Note that nextChunk may be called from any thread -- it's the callers choice, + // including the Envoy main thread, which would occur if the caller initiates + // the request of a chunk upon receipt of the previous chunk. + // + // In that case it may race against the AdminResponse object being deleted, + // in which case the callbacks, held in a shared_ptr, will be cancelled + // from the destructor. If that happens *before* we post to the main thread, + // we will just skip and never call fn. + server_.dispatcher().post([this, response = shared_from_this()]() { requestNextChunk(); }); +} - // Note that nextChunk may be called from any thread -- it's the callers choice, - // including the Envoy main thread, which would occur if the caller initiates - // the request of a chunk upon receipt of the previous chunk. - // - // In that case it may race against the AdminResponse object being deleted, - // in which case the callbacks, held in a shared_ptr, will be cancelled - // from the destructor. If that happens *before* we post to the main thread, - // we will just skip and never call fn. - server_.dispatcher().post([this, response = shared_from_this()]() { requestNextChunk(); }); - } +// Called by the user if it is not longer interested in the result of the +// admin request. After calling cancel() the caller must not call nextChunk or +// getHeaders. +void MainCommonBase::AdminResponse::cancel() { + absl::MutexLock lock(&mutex_); + cancelled_ = true; + headers_fn_ = nullptr; + body_fn_ = nullptr; +} - // Called by the user if it is not longer interested in the result of the - // admin request. After calling cancel() the caller must not call nextChunk or - // getHeaders. - void cancel() override { - absl::MutexLock lock(&mutex_); - cancelled_ = true; - headers_fn_ = nullptr; - body_fn_ = nullptr; - } +bool MainCommonBase::AdminResponse::cancelled() const { + absl::MutexLock lock(&mutex_); + return cancelled_; +} - bool cancelled() const override { - absl::MutexLock lock(&mutex_); - return cancelled_; - } +// Called from MainCommonBase::terminateAdminRequests when the Envoy server +// terminates. After this is called, the caller may need to complete the +// admin response, and so calls to getHeader and nextChunk remain valid, +// resulting in 503 and an empty body. +void MainCommonBase::AdminResponse::terminate() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + absl::MutexLock lock(&mutex_); + terminated_ = true; + sendErrorLockHeld(); + sendAbortChunkLockHeld(); +} - // Called from MainCommonBase::terminateAdminRequests when the Envoy server - // terminates. After this is called, the caller may need to complete the - // admin response, and so calls to getHeader and nextChunk remain valid, - // resulting in 503 and an empty body. - void terminate() override { - ASSERT_IS_MAIN_OR_TEST_THREAD(); +void MainCommonBase::AdminResponse::requestHeaders() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + { absl::MutexLock lock(&mutex_); - terminated_ = true; - sendErrorLockHeld(); - sendAbortChunkLockHeld(); - } - -private: - void requestHeaders() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - { - absl::MutexLock lock(&mutex_); - if (cancelled_ || terminated_) { - return; - } + if (cancelled_ || terminated_) { + return; } - Server::AdminFilter filter(*opt_admin_); - filter.decodeHeaders(*request_headers_, false); - request_ = opt_admin_->makeRequest(filter); - code_ = request_->start(*response_headers_); - { - absl::MutexLock lock(&mutex_); - if (headers_fn_ == nullptr || cancelled_) { - return; - } - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - headers_fn_(code_, *response_headers_); - headers_fn_ = nullptr; + } + Server::AdminFilter filter(*opt_admin_); + filter.decodeHeaders(*request_headers_, false); + request_ = opt_admin_->makeRequest(filter); + code_ = request_->start(*response_headers_); + { + absl::MutexLock lock(&mutex_); + if (headers_fn_ == nullptr || cancelled_) { + return; } + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; } +} - void requestNextChunk() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - { - absl::MutexLock lock(&mutex_); - if (cancelled_ || terminated_) { - return; - } - } - while (response_.length() == 0 && more_data_) { - more_data_ = request_->nextChunk(response_); - } - { - absl::MutexLock lock(&mutex_); - if (sent_end_stream_ || cancelled_) { - return; - } - sent_end_stream_ = !more_data_; - body_fn_(response_, more_data_); - ASSERT(response_.length() == 0); - body_fn_ = nullptr; +void MainCommonBase::AdminResponse::requestNextChunk() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + { + absl::MutexLock lock(&mutex_); + if (cancelled_ || terminated_) { + return; } } - - void sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { - if (!sent_end_stream_ && body_fn_ != nullptr) { - response_.drain(response_.length()); - body_fn_(response_, false); - sent_end_stream_ = true; + while (response_.length() == 0 && more_data_) { + more_data_ = request_->nextChunk(response_); + } + { + absl::MutexLock lock(&mutex_); + if (sent_end_stream_ || cancelled_) { + return; } + sent_end_stream_ = !more_data_; + body_fn_(response_, more_data_); + ASSERT(response_.length() == 0); body_fn_ = nullptr; } +} - void sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { - if (headers_fn_ != nullptr) { - code_ = Http::Code::InternalServerError; - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - headers_fn_(code_, *response_headers_); - headers_fn_ = nullptr; - } +void MainCommonBase::AdminResponse::sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (!sent_end_stream_ && body_fn_ != nullptr) { + response_.drain(response_.length()); + body_fn_(response_, false); + sent_end_stream_ = true; } + body_fn_ = nullptr; +} - Server::Instance& server_; - OptRef opt_admin_; - Buffer::OwnedImpl response_; - Http::Code code_; - Server::Admin::RequestPtr request_; - CleanupFn cleanup_; - Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; - Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; - bool more_data_ = true; - - // True if cancel() was explicitly called by the user; headers and body - // callbacks are never called after cancel(). - bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; - - // True if the Envoy server has stopped running its main loop. Headers and - // body requests can be initiated and called back are called after terminate, - // so callers do not have to special case this -- the request will simply fail - // with an empty response. - bool terminated_ ABSL_GUARDED_BY(mutex_) = false; - - // Used to indicate whether the body function has been called with false - // as its second argument. That must always happen at most once, even - // if terminate races with the normal end-of-stream marker. more=false - // may never be sent if the request is cancelled, nor deleted prior to - // it being requested. - bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; - - HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); - BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); - mutable absl::Mutex mutex_; -}; - -} // namespace +void MainCommonBase::AdminResponse::sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (headers_fn_ != nullptr) { + code_ = Http::Code::InternalServerError; + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; + } +} void MainCommonBase::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); @@ -338,7 +298,7 @@ void MainCommonBase::detachResponse(AdminResponse* response) { MainCommonBase::AdminResponseSharedPtr MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { - auto response = std::make_shared( + auto response = std::make_shared( *server(), path_and_query, method, [this](AdminResponse* response) { detachResponse(response); }); absl::MutexLock lock(&mutex_); diff --git a/source/exe/main_common.h b/source/exe/main_common.h index a365c6871c84..4d4111ed531b 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -53,7 +53,11 @@ class MainCommonBase : public StrippedMainBase { // cancel() is called, no further callbacks will be called by the response. class AdminResponse : public std::enable_shared_from_this { public: - virtual ~AdminResponse() = default; + using CleanupFn = std::function; + + AdminResponse(Server::Instance& server, absl::string_view path, absl::string_view method, + CleanupFn cleanup); + ~AdminResponse(); /** * Requests the headers for the response. This can be called from any @@ -70,7 +74,7 @@ class MainCommonBase : public StrippedMainBase { * @param fn The function to be called with the headers and status code. */ using HeadersFn = std::function; - virtual void getHeaders(HeadersFn fn) PURE; + void getHeaders(HeadersFn fn); /** * Requests a new chunk. This can be called from any thread, and the BodyFn @@ -91,7 +95,7 @@ class MainCommonBase : public StrippedMainBase { * @param fn A function to be called on each chunk. */ using BodyFn = std::function; - virtual void nextChunk(BodyFn fn) PURE; + void nextChunk(BodyFn fn); /** * Requests that any outstanding callbacks be dropped. This can be called @@ -100,12 +104,12 @@ class MainCommonBase : public StrippedMainBase { * shared_ptr as that makes it much easier to manage cancellation across * multiple threads. */ - virtual void cancel() PURE; + void cancel(); /** * @return whether the request was cancelled. */ - virtual bool cancelled() const PURE; + bool cancelled() const; private: friend class MainCommonBase; @@ -116,7 +120,43 @@ class MainCommonBase : public StrippedMainBase { * its callback is called false for the second arg, indicating * end of stream. */ - virtual void terminate() PURE; + void terminate(); + + void requestHeaders(); + void requestNextChunk(); + void sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Server::Instance& server_; + OptRef opt_admin_; + Buffer::OwnedImpl response_; + Http::Code code_; + Server::Admin::RequestPtr request_; + CleanupFn cleanup_; + Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; + Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; + bool more_data_ = true; + + // True if cancel() was explicitly called by the user; headers and body + // callbacks are never called after cancel(). + bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; + + // True if the Envoy server has stopped running its main loop. Headers and + // body requests can be initiated and called back are called after terminate, + // so callers do not have to special case this -- the request will simply fail + // with an empty response. + bool terminated_ ABSL_GUARDED_BY(mutex_) = false; + + // Used to indicate whether the body function has been called with false + // as its second argument. That must always happen at most once, even + // if terminate races with the normal end-of-stream marker. more=false + // may never be sent if the request is cancelled, nor deleted prior to + // it being requested. + bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; + + HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); + BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); + mutable absl::Mutex mutex_; }; using AdminResponseSharedPtr = std::shared_ptr; From f6edee6e8b688b61c8c13ac86eb176b8df99168b Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 22 Feb 2024 21:14:04 -0500 Subject: [PATCH 21/39] add a TerminateNotifier to provide a context with appropriate lifetime to help deal with any order of termination of MainContextBase and AdminResponse. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 73 ++++++++++++++---------------------- source/exe/main_common.h | 45 ++++++++++++++++------ test/exe/main_common_test.cc | 7 ++++ 3 files changed, 69 insertions(+), 56 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index cf94869efb5d..295dc3ba96db 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -57,7 +57,8 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::unique_ptr process_context) : StrippedMainBase(options, time_system, listener_hooks, component_factory, std::move(platform_impl), std::move(random_generator), - std::move(process_context), createFunction()) {} + std::move(process_context), createFunction()), + terminate_notifier_(std::make_shared()) {} bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines @@ -68,7 +69,7 @@ bool MainCommonBase::run() { case Server::Mode::Serve: runServer(); #ifdef ENVOY_ADMIN_FUNCTIONALITY - terminateAdminRequests(); + terminate_notifier_->terminateAdminRequests(); #endif ret = true; break; @@ -106,32 +107,16 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string } MainCommonBase::AdminResponse::AdminResponse(Server::Instance& server, absl::string_view path, - absl::string_view method, CleanupFn cleanup) - : server_(server), opt_admin_(server.admin()), cleanup_(cleanup) { + absl::string_view method, + TerminateNotifierSharedPtr terminate_notifier) + : server_(server), opt_admin_(server.admin()), terminate_notifier_(terminate_notifier) { request_headers_->setMethod(method); request_headers_->setPath(path); } MainCommonBase::AdminResponse::~AdminResponse() { - // MainCommonBase::response_set_ holds a raw pointer to all outstanding - // responses (not a shared_ptr). So when destructing the response - // we must call cleanup on MainCommonBase if this has not already - // occurred. - // - // Note it's also possible for MainCommonBase to be deleted before - // AdminResponse, in which case it will use its response_set_ - // to call terminate, so we'll track that here and skip calling - // cleanup_ in that case. - absl::MutexLock lock(&mutex_); - if (!terminated_) { - // If there is a terminate/destruct race, calling cleanup_ below - // will lock MainCommonBase::mutex_, which is being held by - // terminateAdminRequests, so will block here until all the - // terminate calls are made. Once terminateAdminRequests - // release its lock, cleanup_ will return and the object - // can be safely destructed. - cleanup_(this); // lock in MainCommonBase - } + terminate(); + terminate_notifier_->detachResponse(this); } void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { @@ -200,9 +185,11 @@ bool MainCommonBase::AdminResponse::cancelled() const { void MainCommonBase::AdminResponse::terminate() { ASSERT_IS_MAIN_OR_TEST_THREAD(); absl::MutexLock lock(&mutex_); - terminated_ = true; - sendErrorLockHeld(); - sendAbortChunkLockHeld(); + if (!terminated_) { + terminated_ = true; + sendErrorLockHeld(); + sendAbortChunkLockHeld(); + } } void MainCommonBase::AdminResponse::requestHeaders() { @@ -269,7 +256,7 @@ void MainCommonBase::AdminResponse::sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQ } } -void MainCommonBase::terminateAdminRequests() { +void MainCommonBase::TerminateNotifier::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); absl::MutexLock lock(&mutex_); @@ -284,29 +271,25 @@ void MainCommonBase::terminateAdminRequests() { response_set_.clear(); } -void MainCommonBase::detachResponse(AdminResponse* response) { - absl::MutexLock lock(&mutex_); - int erased = response_set_.erase(response); - ASSERT(erased == 1); - - // We cannot be detaching a response after we've stopped - // admitting new requests. Once we have terminated the - // active requests, they won't call back to detachResponse. - // See the terminated_ check in the destructor. - ASSERT(accepting_admin_requests_); -} - -MainCommonBase::AdminResponseSharedPtr -MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { - auto response = std::make_shared( - *server(), path_and_query, method, - [this](AdminResponse* response) { detachResponse(response); }); +void MainCommonBase::TerminateNotifier::attachResponse(AdminResponse* response) { absl::MutexLock lock(&mutex_); if (accepting_admin_requests_) { - response_set_.insert(response.get()); + response_set_.insert(response); } else { response->terminate(); } +} + +void MainCommonBase::TerminateNotifier::detachResponse(AdminResponse* response) { + absl::MutexLock lock(&mutex_); + response_set_.erase(response); +} + +MainCommonBase::AdminResponseSharedPtr +MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { + auto response = + std::make_shared(*server(), path_and_query, method, terminate_notifier_); + terminate_notifier_->attachResponse(response.get()); return response; } #endif diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 4d4111ed531b..83977f8eea9f 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -34,6 +34,35 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY + class AdminResponse; + + // AdminResponse can outlive MainCommonBase. But AdminResponse needs a + // reliable way of knowing whether MainCommonBase is alive, so we do this with + // TerminateNotifier, which is held by MainCommonBase and all the active + // AdminResponses via shared_ptr. This gives MainCommonBase a reliable way of + // notifying all active responses that it is being shut down, and thus all + // responses need to be terminated. And it gives a reliable way for + // AdminResponse to detach itself, even if MainCommonBase is already deleted. + class TerminateNotifier { + public: + bool alive() const { + absl::MutexLock lock(&mutex_); + return accepting_admin_requests_; + } + + void detachResponse(AdminResponse*); + void attachResponse(AdminResponse*); + + // Called after the server run-loop finishes; any outstanding streaming admin requests + // will otherwise hang as the main-thread dispatcher loop will no longer run. + void terminateAdminRequests(); + + mutable absl::Mutex mutex_; + absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); + bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; + }; + using TerminateNotifierSharedPtr = std::shared_ptr; + // Holds context for a streaming response from the admin system, enabling // flow-control into another system. This is particularly important when the // generated response is very large, such that holding it in memory may cause @@ -53,10 +82,8 @@ class MainCommonBase : public StrippedMainBase { // cancel() is called, no further callbacks will be called by the response. class AdminResponse : public std::enable_shared_from_this { public: - using CleanupFn = std::function; - AdminResponse(Server::Instance& server, absl::string_view path, absl::string_view method, - CleanupFn cleanup); + TerminateNotifierSharedPtr terminate_notifier); ~AdminResponse(); /** @@ -132,7 +159,7 @@ class MainCommonBase : public StrippedMainBase { Buffer::OwnedImpl response_; Http::Code code_; Server::Admin::RequestPtr request_; - CleanupFn cleanup_; + // CleanupFn cleanup_; Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; bool more_data_ = true; @@ -157,6 +184,8 @@ class MainCommonBase : public StrippedMainBase { HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); mutable absl::Mutex mutex_; + + TerminateNotifierSharedPtr terminate_notifier_; }; using AdminResponseSharedPtr = std::shared_ptr; @@ -185,13 +214,7 @@ class MainCommonBase : public StrippedMainBase { void detachResponse(AdminResponse*); private: - // Called after the server run-loop finishes; any outstanding streaming admin requests - // will otherwise hang as the main-thread dispatcher loop will no longer run. - void terminateAdminRequests(); - - absl::Mutex mutex_; - absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); - bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; + TerminateNotifierSharedPtr terminate_notifier_; #endif }; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index f9d1c126c275..54cd51ed61a9 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -837,4 +837,11 @@ TEST_P(AdminStreamingTest, QuitBeforeCreatingResponse) { response.reset(); } +TEST_P(AdminStreamingTest, QuitTerminateRace) { + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + adminRequest("/quitquitquit", "POST"); + response.reset(); + EXPECT_TRUE(waitForEnvoyToExit()); +} + } // namespace Envoy From eda68343c11420631b2cfe1d888c2845b56c2659 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 22 Feb 2024 22:26:49 -0500 Subject: [PATCH 22/39] remove superfluous function, fix compile-time issue with admin disabled. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 9 +++++++-- source/exe/main_common.h | 5 ----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 295dc3ba96db..286e1b928b3b 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -57,8 +57,13 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::unique_ptr process_context) : StrippedMainBase(options, time_system, listener_hooks, component_factory, std::move(platform_impl), std::move(random_generator), - std::move(process_context), createFunction()), - terminate_notifier_(std::make_shared()) {} + std::move(process_context), createFunction()) +#ifdef ENVOY_ADMIN_FUNCTIONALITY + , + terminate_notifier_(std::make_shared()) +#endif +{ +} bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 83977f8eea9f..6692b6e6081d 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -45,11 +45,6 @@ class MainCommonBase : public StrippedMainBase { // AdminResponse to detach itself, even if MainCommonBase is already deleted. class TerminateNotifier { public: - bool alive() const { - absl::MutexLock lock(&mutex_); - return accepting_admin_requests_; - } - void detachResponse(AdminResponse*); void attachResponse(AdminResponse*); From 7239d03c43b5e6d9606b408a6f90216a9a9b9a9f Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Fri, 23 Feb 2024 10:07:22 -0500 Subject: [PATCH 23/39] add more tests and cleanup Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 22 ++++++++-------------- source/exe/main_common.h | 2 +- test/exe/main_common_test.cc | 22 ++++++++++++++++++++-- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 286e1b928b3b..278543815bcb 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -57,14 +57,7 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::unique_ptr process_context) : StrippedMainBase(options, time_system, listener_hooks, component_factory, std::move(platform_impl), std::move(random_generator), - std::move(process_context), createFunction()) -#ifdef ENVOY_ADMIN_FUNCTIONALITY - , - terminate_notifier_(std::make_shared()) -#endif -{ -} - + std::move(process_context), createFunction()) {} bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines // while avoiding gcc warnings by hitting the final return. @@ -119,12 +112,11 @@ MainCommonBase::AdminResponse::AdminResponse(Server::Instance& server, absl::str request_headers_->setPath(path); } -MainCommonBase::AdminResponse::~AdminResponse() { - terminate(); - terminate_notifier_->detachResponse(this); -} +MainCommonBase::AdminResponse::~AdminResponse() { terminate_notifier_->detachResponse(this); } void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { + auto request_headers = [response = shared_from_this()]() { response->requestHeaders(); }; + // First check for cancelling or termination. { absl::MutexLock lock(&mutex_); @@ -138,10 +130,12 @@ void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { return; } } - server_.dispatcher().post([this, response = shared_from_this()]() { requestHeaders(); }); + server_.dispatcher().post(request_headers); } void MainCommonBase::AdminResponse::nextChunk(BodyFn fn) { + auto request_next_chunk = [response = shared_from_this()]() { response->requestNextChunk(); }; + // Note the caller may race a call to nextChunk with the server being // terminated. { @@ -165,7 +159,7 @@ void MainCommonBase::AdminResponse::nextChunk(BodyFn fn) { // in which case the callbacks, held in a shared_ptr, will be cancelled // from the destructor. If that happens *before* we post to the main thread, // we will just skip and never call fn. - server_.dispatcher().post([this, response = shared_from_this()]() { requestNextChunk(); }); + server_.dispatcher().post(request_next_chunk); } // Called by the user if it is not longer interested in the result of the diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 6692b6e6081d..d53ab5532d4c 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -180,7 +180,7 @@ class MainCommonBase : public StrippedMainBase { BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); mutable absl::Mutex mutex_; - TerminateNotifierSharedPtr terminate_notifier_; + TerminateNotifierSharedPtr terminate_notifier_{std::make_shared()}; }; using AdminResponseSharedPtr = std::shared_ptr; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 54cd51ed61a9..3443f9a42160 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -752,11 +752,29 @@ TEST_P(AdminStreamingTest, CancelAfterAskingForHeader1) { TEST_P(AdminStreamingTest, CancelAfterAskingForHeader2) { int header_calls = 0; - // blockMainThreadUntilResume("/ready", "GET"); MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); get_headers_hook_ = [&response]() { response->cancel(); }; response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - // resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + +TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader1) { + int header_calls = 0; + blockMainThreadUntilResume("/ready", "GET"); + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response.reset(); + resume_.Notify(); + quitAndWait(); + EXPECT_EQ(0, header_calls); +} + +TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader2) { + int header_calls = 0; + MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); + get_headers_hook_ = [&response]() { response.reset(); }; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); quitAndWait(); EXPECT_EQ(0, header_calls); } From 7fe67f3c3efaa5398b9af1245ef48faebc202c10 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Fri, 23 Feb 2024 16:17:45 -0500 Subject: [PATCH 24/39] fix initialization and expected results. Signed-off-by: Joshua Marantz --- source/exe/main_common.cc | 14 ++++++++++++-- source/exe/main_common.h | 2 +- test/exe/main_common_test.cc | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 278543815bcb..201c18e9eb8f 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -57,7 +57,14 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::unique_ptr process_context) : StrippedMainBase(options, time_system, listener_hooks, component_factory, std::move(platform_impl), std::move(random_generator), - std::move(process_context), createFunction()) {} + std::move(process_context), createFunction()) +#ifdef ENVOY_ADMIN_FUNCTIONALITY + , + terminate_notifier_(std::make_shared()) +#endif +{ +} + bool MainCommonBase::run() { // Avoid returning from inside switch cases to minimize uncovered lines // while avoiding gcc warnings by hitting the final return. @@ -112,7 +119,10 @@ MainCommonBase::AdminResponse::AdminResponse(Server::Instance& server, absl::str request_headers_->setPath(path); } -MainCommonBase::AdminResponse::~AdminResponse() { terminate_notifier_->detachResponse(this); } +MainCommonBase::AdminResponse::~AdminResponse() { + cancel(); + terminate_notifier_->detachResponse(this); +} void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { auto request_headers = [response = shared_from_this()]() { response->requestHeaders(); }; diff --git a/source/exe/main_common.h b/source/exe/main_common.h index d53ab5532d4c..6692b6e6081d 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -180,7 +180,7 @@ class MainCommonBase : public StrippedMainBase { BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); mutable absl::Mutex mutex_; - TerminateNotifierSharedPtr terminate_notifier_{std::make_shared()}; + TerminateNotifierSharedPtr terminate_notifier_; }; using AdminResponseSharedPtr = std::shared_ptr; diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 3443f9a42160..e1025a17eecd 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -767,7 +767,7 @@ TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader1) { response.reset(); resume_.Notify(); quitAndWait(); - EXPECT_EQ(0, header_calls); + EXPECT_EQ(1, header_calls); } TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader2) { @@ -776,7 +776,7 @@ TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader2) { get_headers_hook_ = [&response]() { response.reset(); }; response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); quitAndWait(); - EXPECT_EQ(0, header_calls); + EXPECT_EQ(1, header_calls); } TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk1) { From aa70f3393fa4b5c5f92ffd3f51afbf0180665abd Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 26 Feb 2024 20:35:00 -0500 Subject: [PATCH 25/39] Split out AdminContext into its own source, hdr, test. Signed-off-by: Joshua Marantz --- source/exe/BUILD | 14 + source/exe/README.md | 10 + source/exe/admin_response.cc | 193 +++++++++++++ source/exe/admin_response.h | 166 +++++++++++ source/exe/main_common.cc | 187 +----------- source/exe/main_common.h | 154 +--------- source/server/admin/BUILD | 1 + test/exe/BUILD | 40 +++ test/exe/admin_response_test.cc | 334 +++++++++++++++++++++ test/exe/main_common_test.cc | 466 +----------------------------- test/exe/main_common_test_base.cc | 117 ++++++++ test/exe/main_common_test_base.h | 77 +++++ 12 files changed, 965 insertions(+), 794 deletions(-) create mode 100644 source/exe/README.md create mode 100644 source/exe/admin_response.cc create mode 100644 source/exe/admin_response.h create mode 100644 test/exe/admin_response_test.cc create mode 100644 test/exe/main_common_test_base.cc create mode 100644 test/exe/main_common_test_base.h diff --git a/source/exe/BUILD b/source/exe/BUILD index a4651aae9b4f..f3250d4b7c67 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -103,6 +103,7 @@ envoy_cc_library( "main_common.h", ], deps = [ + ":admin_response_lib", ":platform_impl_lib", ":process_wide_lib", ":stripped_main_base_lib", @@ -118,6 +119,19 @@ envoy_cc_library( ], ) +envoy_cc_library( + name = "admin_response_lib", + srcs = ["admin_response.cc"], + hdrs = ["admin_response.h"], + deps = [ + "//source/common/buffer:buffer_lib", + "//source/common/http:header_map_lib", + "//source/server:server_lib", + "//source/server/admin:admin_lib", + "//source/server/admin:utils_lib", + ], +) + envoy_cc_library( name = "main_common_with_all_extensions_lib", deps = [ diff --git a/source/exe/README.md b/source/exe/README.md new file mode 100644 index 000000000000..bb3beb5ba8ef --- /dev/null +++ b/source/exe/README.md @@ -0,0 +1,10 @@ +State Action Next State Side Effects + +Ground adminRequest(u,m) ResponseInitial +ResponseInitial getHeaders HeadersWait post to main thread to get headers +HeadersWait postComplete HeadersSent call HeadersFn +HeadersSent + + +ResponseInitial cancel ResponseCancelled +Terminated adminRequest(u,m) ResponseTerminated diff --git a/source/exe/admin_response.cc b/source/exe/admin_response.cc new file mode 100644 index 000000000000..42ddb0f40be5 --- /dev/null +++ b/source/exe/admin_response.cc @@ -0,0 +1,193 @@ +#include "source/exe/admin_response.h" + +#include "envoy/server/admin.h" + +#include "source/server/admin/admin_filter.h" +#include "source/server/admin/utils.h" + +namespace Envoy { + +AdminResponse::AdminResponse(Server::Instance& server, absl::string_view path, + absl::string_view method, + TerminateNotifierSharedPtr terminate_notifier) + : server_(server), opt_admin_(server.admin()), terminate_notifier_(terminate_notifier) { + request_headers_->setMethod(method); + request_headers_->setPath(path); +} + +AdminResponse::~AdminResponse() { + cancel(); + terminate_notifier_->detachResponse(this); +} + +void AdminResponse::getHeaders(HeadersFn fn) { + auto request_headers = [response = shared_from_this()]() { response->requestHeaders(); }; + + // First check for cancelling or termination. + { + absl::MutexLock lock(&mutex_); + ASSERT(headers_fn_ == nullptr); + if (cancelled_) { + return; + } + headers_fn_ = fn; + if (terminated_ || !opt_admin_) { + sendErrorLockHeld(); + return; + } + } + server_.dispatcher().post(request_headers); +} + +void AdminResponse::nextChunk(BodyFn fn) { + auto request_next_chunk = [response = shared_from_this()]() { response->requestNextChunk(); }; + + // Note the caller may race a call to nextChunk with the server being + // terminated. + { + absl::MutexLock lock(&mutex_); + ASSERT(body_fn_ == nullptr); + if (cancelled_) { + return; + } + body_fn_ = fn; + if (terminated_ || !opt_admin_) { + sendAbortChunkLockHeld(); + return; + } + } + + // Note that nextChunk may be called from any thread -- it's the callers choice, + // including the Envoy main thread, which would occur if the caller initiates + // the request of a chunk upon receipt of the previous chunk. + // + // In that case it may race against the AdminResponse object being deleted, + // in which case the callbacks, held in a shared_ptr, will be cancelled + // from the destructor. If that happens *before* we post to the main thread, + // we will just skip and never call fn. + server_.dispatcher().post(request_next_chunk); +} + +// Called by the user if it is not longer interested in the result of the +// admin request. After calling cancel() the caller must not call nextChunk or +// getHeaders. +void AdminResponse::cancel() { + absl::MutexLock lock(&mutex_); + cancelled_ = true; + headers_fn_ = nullptr; + body_fn_ = nullptr; +} + +bool AdminResponse::cancelled() const { + absl::MutexLock lock(&mutex_); + return cancelled_; +} + +// Called from terminateAdminRequests when the Envoy server +// terminates. After this is called, the caller may need to complete the +// admin response, and so calls to getHeader and nextChunk remain valid, +// resulting in 503 and an empty body. +void AdminResponse::terminate() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + absl::MutexLock lock(&mutex_); + if (!terminated_) { + terminated_ = true; + sendErrorLockHeld(); + sendAbortChunkLockHeld(); + } +} + +void AdminResponse::requestHeaders() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + { + absl::MutexLock lock(&mutex_); + if (cancelled_ || terminated_) { + return; + } + } + Server::AdminFilter filter(*opt_admin_); + filter.decodeHeaders(*request_headers_, false); + request_ = opt_admin_->makeRequest(filter); + code_ = request_->start(*response_headers_); + { + absl::MutexLock lock(&mutex_); + if (headers_fn_ == nullptr || cancelled_) { + return; + } + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; + } +} + +void AdminResponse::requestNextChunk() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + { + absl::MutexLock lock(&mutex_); + if (cancelled_ || terminated_) { + return; + } + } + while (response_.length() == 0 && more_data_) { + more_data_ = request_->nextChunk(response_); + } + { + absl::MutexLock lock(&mutex_); + if (sent_end_stream_ || cancelled_) { + return; + } + sent_end_stream_ = !more_data_; + body_fn_(response_, more_data_); + ASSERT(response_.length() == 0); + body_fn_ = nullptr; + } +} + +void AdminResponse::sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (!sent_end_stream_ && body_fn_ != nullptr) { + response_.drain(response_.length()); + body_fn_(response_, false); + sent_end_stream_ = true; + } + body_fn_ = nullptr; +} + +void AdminResponse::sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { + if (headers_fn_ != nullptr) { + code_ = Http::Code::InternalServerError; + Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); + headers_fn_(code_, *response_headers_); + headers_fn_ = nullptr; + } +} + +void TerminateNotifier::terminateAdminRequests() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + + absl::MutexLock lock(&mutex_); + accepting_admin_requests_ = false; + for (AdminResponse* response : response_set_) { + // Consider the possibility of response being deleted due to its creator + // dropping its last reference right here. From its destructor it will call + // detachResponse(), which is mutex-ed against this loop, so before the + // memory becomes invalid, the call to terminate will complete. + response->terminate(); + } + response_set_.clear(); +} + +void TerminateNotifier::attachResponse(AdminResponse* response) { + absl::MutexLock lock(&mutex_); + if (accepting_admin_requests_) { + response_set_.insert(response); + } else { + response->terminate(); + } +} + +void TerminateNotifier::detachResponse(AdminResponse* response) { + absl::MutexLock lock(&mutex_); + response_set_.erase(response); +} + +} // namespace Envoy diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h new file mode 100644 index 000000000000..3c8dbc811106 --- /dev/null +++ b/source/exe/admin_response.h @@ -0,0 +1,166 @@ +#pragma once + +#include + +#include "envoy/server/instance.h" + +#include "source/common/buffer/buffer_impl.h" +#include "source/common/http/header_map_impl.h" + +#include "absl/container/flat_hash_set.h" +#include "absl/synchronization/mutex.h" + +namespace Envoy { + +class AdminResponse; + +// AdminResponse can outlive MainCommonBase. But AdminResponse needs a +// reliable way of knowing whether MainCommonBase is alive, so we do this with +// TerminateNotifier, which is held by MainCommonBase and all the active +// AdminResponses via shared_ptr. This gives MainCommonBase a reliable way of +// notifying all active responses that it is being shut down, and thus all +// responses need to be terminated. And it gives a reliable way for +// AdminResponse to detach itself, even if MainCommonBase is already deleted. +class TerminateNotifier { +public: + void detachResponse(AdminResponse*); + void attachResponse(AdminResponse*); + + // Called after the server run-loop finishes; any outstanding streaming admin requests + // will otherwise hang as the main-thread dispatcher loop will no longer run. + void terminateAdminRequests(); + + mutable absl::Mutex mutex_; + absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); + bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; +}; +using TerminateNotifierSharedPtr = std::shared_ptr; + +// Holds context for a streaming response from the admin system, enabling +// flow-control into another system. This is particularly important when the +// generated response is very large, such that holding it in memory may cause +// fragmentation or out-of-memory failures. It is possible to interleave xDS +// response handling, overload management, and other admin requests during the +// streaming of a long admin response. +// +// There can be be multiple AdminResponses at a time; each are separately +// managed. However they will obtain their data from Envoy functions that +// run on the main thread. +// +// Responses may still be active after the server has shut down, and is no +// longer running its main thread dispatcher. In this state, the callbacks +// will be called with appropriate error codes. +// +// Requests can also be cancelled explicitly by calling cancel(). After +// cancel() is called, no further callbacks will be called by the response. +class AdminResponse : public std::enable_shared_from_this { +public: + AdminResponse(Server::Instance& server, absl::string_view path, absl::string_view method, + TerminateNotifierSharedPtr terminate_notifier); + ~AdminResponse(); + + /** + * Requests the headers for the response. This can be called from any + * thread, and HeaderFn may also be called from any thread. + * + * HeadersFn will not be called after cancel(). It is invalid to + * to call nextChunk from within HeadersFn -- the caller must trigger + * such a call on another thread, after HeadersFn returns. Calling + * nextChunk from HeadersFn may deadlock. + * + * If the server is shut down during the operation, headersFn may + * be called with a 503, if it has not already been called. + * + * @param fn The function to be called with the headers and status code. + */ + using HeadersFn = std::function; + void getHeaders(HeadersFn fn); + + /** + * Requests a new chunk. This can be called from any thread, and the BodyFn + * callback may also be called from any thread. BodyFn will be called in a + * loop until the Buffer passed to it is fully drained. When 'false' is + * passed as the second arg to BodyFn, that signifies the end of the + * response, and nextChunk must not be called again. + * + * BodyFn will not be called after cancel(). It is invalid to + * to call nextChunk from within BodyFn -- the caller must trigger + * such a call on another thread, after BodyFn returns. Calling + * nextChunk from BodyFn may deadlock. + * + * If the server is shut down during the operation, bodyFn will + * be called with an empty body and 'false' for more_data, if + * this has not already occurred. + * + * @param fn A function to be called on each chunk. + */ + using BodyFn = std::function; + void nextChunk(BodyFn fn); + + /** + * Requests that any outstanding callbacks be dropped. This can be called + * when the context in which the request is made is destroyed. This enables + * an application to implement a. The Response itself is held as a + * shared_ptr as that makes it much easier to manage cancellation across + * multiple threads. + */ + void cancel(); + + /** + * @return whether the request was cancelled. + */ + bool cancelled() const; + +private: + friend class MainCommonBase; + friend class TerminateNotifier; + + /** + * Called when the server is terminated. This calls any outstanding + * callbacks to be called. If nextChunk is called after termination, + * its callback is called false for the second arg, indicating + * end of stream. + */ + void terminate(); + + void requestHeaders(); + void requestNextChunk(); + void sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + Server::Instance& server_; + OptRef opt_admin_; + Buffer::OwnedImpl response_; + Http::Code code_; + Server::Admin::RequestPtr request_; + // CleanupFn cleanup_; + Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; + Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; + bool more_data_ = true; + + // True if cancel() was explicitly called by the user; headers and body + // callbacks are never called after cancel(). + bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; + + // True if the Envoy server has stopped running its main loop. Headers and + // body requests can be initiated and called back are called after terminate, + // so callers do not have to special case this -- the request will simply fail + // with an empty response. + bool terminated_ ABSL_GUARDED_BY(mutex_) = false; + + // Used to indicate whether the body function has been called with false + // as its second argument. That must always happen at most once, even + // if terminate races with the normal end-of-stream marker. more=false + // may never be sent if the request is cancelled, nor deleted prior to + // it being requested. + bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; + + HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); + BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); + mutable absl::Mutex mutex_; + + TerminateNotifierSharedPtr terminate_notifier_; +}; +using AdminResponseSharedPtr = std::shared_ptr; + +} // namespace Envoy diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index 201c18e9eb8f..bdf26c324f58 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -111,191 +111,8 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string }); } -MainCommonBase::AdminResponse::AdminResponse(Server::Instance& server, absl::string_view path, - absl::string_view method, - TerminateNotifierSharedPtr terminate_notifier) - : server_(server), opt_admin_(server.admin()), terminate_notifier_(terminate_notifier) { - request_headers_->setMethod(method); - request_headers_->setPath(path); -} - -MainCommonBase::AdminResponse::~AdminResponse() { - cancel(); - terminate_notifier_->detachResponse(this); -} - -void MainCommonBase::AdminResponse::getHeaders(HeadersFn fn) { - auto request_headers = [response = shared_from_this()]() { response->requestHeaders(); }; - - // First check for cancelling or termination. - { - absl::MutexLock lock(&mutex_); - ASSERT(headers_fn_ == nullptr); - if (cancelled_) { - return; - } - headers_fn_ = fn; - if (terminated_ || !opt_admin_) { - sendErrorLockHeld(); - return; - } - } - server_.dispatcher().post(request_headers); -} - -void MainCommonBase::AdminResponse::nextChunk(BodyFn fn) { - auto request_next_chunk = [response = shared_from_this()]() { response->requestNextChunk(); }; - - // Note the caller may race a call to nextChunk with the server being - // terminated. - { - absl::MutexLock lock(&mutex_); - ASSERT(body_fn_ == nullptr); - if (cancelled_) { - return; - } - body_fn_ = fn; - if (terminated_ || !opt_admin_) { - sendAbortChunkLockHeld(); - return; - } - } - - // Note that nextChunk may be called from any thread -- it's the callers choice, - // including the Envoy main thread, which would occur if the caller initiates - // the request of a chunk upon receipt of the previous chunk. - // - // In that case it may race against the AdminResponse object being deleted, - // in which case the callbacks, held in a shared_ptr, will be cancelled - // from the destructor. If that happens *before* we post to the main thread, - // we will just skip and never call fn. - server_.dispatcher().post(request_next_chunk); -} - -// Called by the user if it is not longer interested in the result of the -// admin request. After calling cancel() the caller must not call nextChunk or -// getHeaders. -void MainCommonBase::AdminResponse::cancel() { - absl::MutexLock lock(&mutex_); - cancelled_ = true; - headers_fn_ = nullptr; - body_fn_ = nullptr; -} - -bool MainCommonBase::AdminResponse::cancelled() const { - absl::MutexLock lock(&mutex_); - return cancelled_; -} - -// Called from MainCommonBase::terminateAdminRequests when the Envoy server -// terminates. After this is called, the caller may need to complete the -// admin response, and so calls to getHeader and nextChunk remain valid, -// resulting in 503 and an empty body. -void MainCommonBase::AdminResponse::terminate() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - absl::MutexLock lock(&mutex_); - if (!terminated_) { - terminated_ = true; - sendErrorLockHeld(); - sendAbortChunkLockHeld(); - } -} - -void MainCommonBase::AdminResponse::requestHeaders() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - { - absl::MutexLock lock(&mutex_); - if (cancelled_ || terminated_) { - return; - } - } - Server::AdminFilter filter(*opt_admin_); - filter.decodeHeaders(*request_headers_, false); - request_ = opt_admin_->makeRequest(filter); - code_ = request_->start(*response_headers_); - { - absl::MutexLock lock(&mutex_); - if (headers_fn_ == nullptr || cancelled_) { - return; - } - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - headers_fn_(code_, *response_headers_); - headers_fn_ = nullptr; - } -} - -void MainCommonBase::AdminResponse::requestNextChunk() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - { - absl::MutexLock lock(&mutex_); - if (cancelled_ || terminated_) { - return; - } - } - while (response_.length() == 0 && more_data_) { - more_data_ = request_->nextChunk(response_); - } - { - absl::MutexLock lock(&mutex_); - if (sent_end_stream_ || cancelled_) { - return; - } - sent_end_stream_ = !more_data_; - body_fn_(response_, more_data_); - ASSERT(response_.length() == 0); - body_fn_ = nullptr; - } -} - -void MainCommonBase::AdminResponse::sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { - if (!sent_end_stream_ && body_fn_ != nullptr) { - response_.drain(response_.length()); - body_fn_(response_, false); - sent_end_stream_ = true; - } - body_fn_ = nullptr; -} - -void MainCommonBase::AdminResponse::sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { - if (headers_fn_ != nullptr) { - code_ = Http::Code::InternalServerError; - Server::Utility::populateFallbackResponseHeaders(code_, *response_headers_); - headers_fn_(code_, *response_headers_); - headers_fn_ = nullptr; - } -} - -void MainCommonBase::TerminateNotifier::terminateAdminRequests() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - - absl::MutexLock lock(&mutex_); - accepting_admin_requests_ = false; - for (AdminResponse* response : response_set_) { - // Consider the possibility of response being deleted due to its creator - // dropping its last reference right here. From its destructor it will call - // detachResponse(), which is mutex-ed against this loop, so before the - // memory becomes invalid, the call to terminate will complete. - response->terminate(); - } - response_set_.clear(); -} - -void MainCommonBase::TerminateNotifier::attachResponse(AdminResponse* response) { - absl::MutexLock lock(&mutex_); - if (accepting_admin_requests_) { - response_set_.insert(response); - } else { - response->terminate(); - } -} - -void MainCommonBase::TerminateNotifier::detachResponse(AdminResponse* response) { - absl::MutexLock lock(&mutex_); - response_set_.erase(response); -} - -MainCommonBase::AdminResponseSharedPtr -MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { +AdminResponseSharedPtr MainCommonBase::adminRequest(absl::string_view path_and_query, + absl::string_view method) { auto response = std::make_shared(*server(), path_and_query, method, terminate_notifier_); terminate_notifier_->attachResponse(response.get()); diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 6692b6e6081d..ade0915aaada 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -10,6 +10,7 @@ #include "source/common/stats/symbol_table.h" #include "source/common/stats/thread_local_store.h" #include "source/common/thread_local/thread_local_impl.h" +#include "source/exe/admin_response.h" #include "source/exe/process_wide.h" #include "source/exe/stripped_main_base.h" #include "source/server/listener_hooks.h" @@ -34,156 +35,6 @@ class MainCommonBase : public StrippedMainBase { bool run(); #ifdef ENVOY_ADMIN_FUNCTIONALITY - class AdminResponse; - - // AdminResponse can outlive MainCommonBase. But AdminResponse needs a - // reliable way of knowing whether MainCommonBase is alive, so we do this with - // TerminateNotifier, which is held by MainCommonBase and all the active - // AdminResponses via shared_ptr. This gives MainCommonBase a reliable way of - // notifying all active responses that it is being shut down, and thus all - // responses need to be terminated. And it gives a reliable way for - // AdminResponse to detach itself, even if MainCommonBase is already deleted. - class TerminateNotifier { - public: - void detachResponse(AdminResponse*); - void attachResponse(AdminResponse*); - - // Called after the server run-loop finishes; any outstanding streaming admin requests - // will otherwise hang as the main-thread dispatcher loop will no longer run. - void terminateAdminRequests(); - - mutable absl::Mutex mutex_; - absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); - bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; - }; - using TerminateNotifierSharedPtr = std::shared_ptr; - - // Holds context for a streaming response from the admin system, enabling - // flow-control into another system. This is particularly important when the - // generated response is very large, such that holding it in memory may cause - // fragmentation or out-of-memory failures. It is possible to interleave xDS - // response handling, overload management, and other admin requests during the - // streaming of a long admin response. - // - // There can be be multiple AdminResponses at a time; each are separately - // managed. However they will obtain their data from Envoy functions that - // run on the main thread. - // - // Responses may still be active after the server has shut down, and is no - // longer running its main thread dispatcher. In this state, the callbacks - // will be called with appropriate error codes. - // - // Requests can also be cancelled explicitly by calling cancel(). After - // cancel() is called, no further callbacks will be called by the response. - class AdminResponse : public std::enable_shared_from_this { - public: - AdminResponse(Server::Instance& server, absl::string_view path, absl::string_view method, - TerminateNotifierSharedPtr terminate_notifier); - ~AdminResponse(); - - /** - * Requests the headers for the response. This can be called from any - * thread, and HeaderFn may also be called from any thread. - * - * HeadersFn will not be called after cancel(). It is invalid to - * to call nextChunk from within HeadersFn -- the caller must trigger - * such a call on another thread, after HeadersFn returns. Calling - * nextChunk from HeadersFn may deadlock. - * - * If the server is shut down during the operation, headersFn may - * be called with a 503, if it has not already been called. - * - * @param fn The function to be called with the headers and status code. - */ - using HeadersFn = std::function; - void getHeaders(HeadersFn fn); - - /** - * Requests a new chunk. This can be called from any thread, and the BodyFn - * callback may also be called from any thread. BodyFn will be called in a - * loop until the Buffer passed to it is fully drained. When 'false' is - * passed as the second arg to BodyFn, that signifies the end of the - * response, and nextChunk must not be called again. - * - * BodyFn will not be called after cancel(). It is invalid to - * to call nextChunk from within BodyFn -- the caller must trigger - * such a call on another thread, after BodyFn returns. Calling - * nextChunk from BodyFn may deadlock. - * - * If the server is shut down during the operation, bodyFn will - * be called with an empty body and 'false' for more_data, if - * this has not already occurred. - * - * @param fn A function to be called on each chunk. - */ - using BodyFn = std::function; - void nextChunk(BodyFn fn); - - /** - * Requests that any outstanding callbacks be dropped. This can be called - * when the context in which the request is made is destroyed. This enables - * an application to implement a. The Response itself is held as a - * shared_ptr as that makes it much easier to manage cancellation across - * multiple threads. - */ - void cancel(); - - /** - * @return whether the request was cancelled. - */ - bool cancelled() const; - - private: - friend class MainCommonBase; - - /** - * Called when the server is terminated. This calls any outstanding - * callbacks to be called. If nextChunk is called after termination, - * its callback is called false for the second arg, indicating - * end of stream. - */ - void terminate(); - - void requestHeaders(); - void requestNextChunk(); - void sendAbortChunkLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - void sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); - - Server::Instance& server_; - OptRef opt_admin_; - Buffer::OwnedImpl response_; - Http::Code code_; - Server::Admin::RequestPtr request_; - // CleanupFn cleanup_; - Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; - Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; - bool more_data_ = true; - - // True if cancel() was explicitly called by the user; headers and body - // callbacks are never called after cancel(). - bool cancelled_ ABSL_GUARDED_BY(mutex_) = false; - - // True if the Envoy server has stopped running its main loop. Headers and - // body requests can be initiated and called back are called after terminate, - // so callers do not have to special case this -- the request will simply fail - // with an empty response. - bool terminated_ ABSL_GUARDED_BY(mutex_) = false; - - // Used to indicate whether the body function has been called with false - // as its second argument. That must always happen at most once, even - // if terminate races with the normal end-of-stream marker. more=false - // may never be sent if the request is cancelled, nor deleted prior to - // it being requested. - bool sent_end_stream_ ABSL_GUARDED_BY(mutex_) = false; - - HeadersFn headers_fn_ ABSL_GUARDED_BY(mutex_); - BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); - mutable absl::Mutex mutex_; - - TerminateNotifierSharedPtr terminate_notifier_; - }; - using AdminResponseSharedPtr = std::shared_ptr; - using AdminRequestFn = std::function; @@ -241,8 +92,7 @@ class MainCommon { const MainCommonBase::AdminRequestFn& handler) { base_.adminRequest(path_and_query, method, handler); } - MainCommonBase::AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, - absl::string_view method) { + AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, absl::string_view method) { return base_.adminRequest(path_and_query, method); } #endif diff --git a/source/server/admin/BUILD b/source/server/admin/BUILD index 3913191068d4..1c453e712a8e 100644 --- a/source/server/admin/BUILD +++ b/source/server/admin/BUILD @@ -388,6 +388,7 @@ envoy_cc_library( name = "utils_lib", srcs = ["utils.cc"], hdrs = ["utils.h"], + visibility = ["//visibility:public"], deps = [ "//envoy/init:manager_interface", "//source/common/common:enum_to_int", diff --git a/test/exe/BUILD b/test/exe/BUILD index 7be94770af65..33f732b37af5 100644 --- a/test/exe/BUILD +++ b/test/exe/BUILD @@ -1,6 +1,7 @@ load( "//bazel:envoy_build_system.bzl", "envoy_cc_test", + "envoy_cc_test_library", "envoy_package", "envoy_select_admin_functionality", "envoy_sh_test", @@ -60,6 +61,26 @@ envoy_sh_test( ], ) +envoy_cc_test_library( + name = "main_common_test_base_lib", + srcs = ["main_common_test_base.cc"], + hdrs = ["main_common_test_base.h"], + data = [ + "//test/config/integration:google_com_proxy_port_0", + ], + deps = [ + "//source/common/api:api_lib", + "//source/common/stats:isolated_store_lib", + "//source/exe:envoy_main_common_with_core_extensions_lib", + "//source/exe:platform_impl_lib", + "//source/extensions/clusters/logical_dns:logical_dns_cluster_lib", + "//test/mocks/runtime:runtime_mocks", + "//test/test_common:contention_lib", + "//test/test_common:environment_lib", + "//test/test_common:thread_factory_for_test_lib", + ], +) + envoy_cc_test( name = "main_common_test", srcs = envoy_select_admin_functionality(["main_common_test.cc"]), @@ -67,6 +88,25 @@ envoy_cc_test( "//test/config/integration:google_com_proxy_port_0", ], deps = [ + ":main_common_test_base_lib", + "//source/common/api:api_lib", + "//source/exe:envoy_main_common_with_core_extensions_lib", + "//source/exe:platform_impl_lib", + "//source/extensions/clusters/logical_dns:logical_dns_cluster_lib", + "//test/mocks/runtime:runtime_mocks", + "//test/test_common:contention_lib", + "//test/test_common:environment_lib", + ], +) + +envoy_cc_test( + name = "admin_response_test", + srcs = envoy_select_admin_functionality(["admin_response_test.cc"]), + data = [ + "//test/config/integration:google_com_proxy_port_0", + ], + deps = [ + ":main_common_test_base_lib", "//source/common/api:api_lib", "//source/exe:envoy_main_common_with_core_extensions_lib", "//source/exe:platform_impl_lib", diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc new file mode 100644 index 000000000000..ff4d94dbeb73 --- /dev/null +++ b/test/exe/admin_response_test.cc @@ -0,0 +1,334 @@ +#include "test/exe/main_common_test_base.h" + +#include "gtest/gtest.h" + +namespace Envoy { + +class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { +protected: + static constexpr absl::string_view StreamingEndpoint = "/stream"; + + class StreamingAdminRequest : public Envoy::Server::Admin::Request { + public: + static constexpr uint64_t NumChunks = 10; + static constexpr uint64_t BytesPerChunk = 10000; + + StreamingAdminRequest(std::function& get_headers_hook, + std::function& next_chunk_hook) + : chunk_(BytesPerChunk, 'a'), get_headers_hook_(get_headers_hook), + next_chunk_hook_(next_chunk_hook) {} + Http::Code start(Http::ResponseHeaderMap&) override { + get_headers_hook_(); + return Http::Code::OK; + } + bool nextChunk(Buffer::Instance& response) override { + next_chunk_hook_(); + response.add(chunk_); + return --chunks_remaining_ > 0; + } + + private: + const std::string chunk_; + uint64_t chunks_remaining_{NumChunks}; + std::function& get_headers_hook_; + std::function& next_chunk_hook_; + }; + + AdminStreamingTest() : AdminRequestTestBase(Network::Address::IpVersion::v4) { + startEnvoy(); + started_.WaitForNotification(); + Server::Admin& admin = *main_common_->server()->admin(); + admin.addStreamingHandler( + std::string(StreamingEndpoint), "streaming api", + [this](Server::AdminStream&) -> Server::Admin::RequestPtr { + return std::make_unique(get_headers_hook_, next_chunk_hook_); + }, + true, false); + } + + using ChunksBytes = std::pair; + ChunksBytes runStreamingRequest(AdminResponseSharedPtr response, + std::function chunk_hook = nullptr) { + absl::Notification done; + std::vector out; + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + bool cont = true; + uint64_t num_chunks = 0; + uint64_t num_bytes = 0; + while (cont && !response->cancelled()) { + absl::Notification chunk_notify; + response->nextChunk( + [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { + cont = more; + num_bytes += chunk.length(); + chunk.drain(chunk.length()); + ++num_chunks; + chunk_notify.Notify(); + }); + chunk_notify.WaitForNotification(); + if (chunk_hook != nullptr) { + chunk_hook(); + } + } + + return ChunksBytes(num_chunks, num_bytes); + } + + /** + * @return a streaming response to a GET of StreamingEndpoint. + */ + AdminResponseSharedPtr streamingResponse() { + return main_common_->adminRequest(StreamingEndpoint, "GET"); + } + + /** + * In order to trigger certain early-exit criteria in a test, we can exploit + * the fact that all the admin responses are delivered on the main thread. + * So we can pause those by blocking the main thread indefinitely. + * + * To resume the main thread, call resume_.Notify(); + * + * @param url the stats endpoint to initiate. + */ + void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { + AdminResponseSharedPtr blocked_response = main_common_->adminRequest(url, method); + absl::Notification block_main_thread; + blocked_response->getHeaders( + [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); + } + + /** + * To hit an early exit after the second lock in + * AdminResponseImpl::requestNextChunk and AdminResponseImpl::requestHeaders + * we, need to post a cancel request to the main thread, but block that on the + * resume_ notification. This allows a subsequent test call to getHeaders or + * nextChunk initiate a post to main thread, but runs the cancel call on the + * response before headers_fn_ or body_fn_ runs. + * + * @param response the response to cancel after resume_. + */ + void blockMainThreadAndCancelResponseAfterResume(AdminResponseSharedPtr response) { + main_common_->dispatcherForTest().post([response, this] { + resume_.WaitForNotification(); + response->cancel(); + }); + } + + /** + * Requests the headers and waits until the headers have been sent. + * + * @param response the response from which to get headers. + */ + void waitForHeaders(AdminResponseSharedPtr response) { + absl::Notification headers_notify; + response->getHeaders( + [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + headers_notify.WaitForNotification(); + } + + /** + * Initiates a '/quitquitquit' call and requests the headers for that call, + * but does not wait for the call to complete. We avoid waiting in order to + * trigger a potential race to ensure that MainCommon handles it properly. + */ + void quitAndRequestHeaders() { + AdminResponseSharedPtr quit_response = main_common_->adminRequest("/quitquitquit", "POST"); + quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); + } + + // This variable provides a hook to allow a test method to specify a hook to + // run when nextChunk() is called. This is currently used only for one test, + // CancelAfterAskingForChunk, that initiates a cancel() from within the chunk + // handler. + std::function get_headers_hook_ = []() {}; + std::function next_chunk_hook_ = []() {}; +}; + +TEST_F(AdminStreamingTest, RequestGetStatsAndQuit) { + AdminResponseSharedPtr response = streamingResponse(); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); + EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + EXPECT_TRUE(quitAndWait()); +} + +TEST_F(AdminStreamingTest, QuitDuringChunks) { + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + AdminResponseSharedPtr response = streamingResponse(); + ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { + if (++quit_counter == chunks_to_send_before_quitting) { + EXPECT_TRUE(quitAndWait()); + } + }); + EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); +} + +TEST_F(AdminStreamingTest, CancelDuringChunks) { + int quit_counter = 0; + static constexpr int chunks_to_send_before_quitting = 3; + AdminResponseSharedPtr response = streamingResponse(); + ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { + if (++quit_counter == chunks_to_send_before_quitting) { + response->cancel(); + } + }); + EXPECT_EQ(3, chunks_bytes.first); // no final call to the chunk handler after cancel. + EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, + chunks_bytes.second); + EXPECT_TRUE(quitAndWait()); +} + +TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader1) { + blockMainThreadUntilResume("/ready", "GET"); + AdminResponseSharedPtr response = streamingResponse(); + response->cancel(); + resume_.Notify(); + int header_calls = 0; + + // After 'cancel', the headers function will not be called. + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, header_calls); +} + +TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader2) { + AdminResponseSharedPtr response = streamingResponse(); + blockMainThreadAndCancelResponseAfterResume(response); + int header_calls = 0; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, header_calls); +} + +TEST_F(AdminStreamingTest, CancelAfterAskingForHeader1) { + int header_calls = 0; + blockMainThreadUntilResume("/ready", "GET"); + AdminResponseSharedPtr response = streamingResponse(); + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response->cancel(); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, header_calls); +} + +TEST_F(AdminStreamingTest, CancelAfterAskingForHeader2) { + int header_calls = 0; + AdminResponseSharedPtr response = streamingResponse(); + get_headers_hook_ = [&response]() { response->cancel(); }; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, header_calls); +} + +TEST_F(AdminStreamingTest, DeleteAfterAskingForHeader1) { + int header_calls = 0; + blockMainThreadUntilResume("/ready", "GET"); + AdminResponseSharedPtr response = streamingResponse(); + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response.reset(); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(1, header_calls); +} + +TEST_F(AdminStreamingTest, DeleteAfterAskingForHeader2) { + int header_calls = 0; + AdminResponseSharedPtr response = streamingResponse(); + get_headers_hook_ = [&response]() { response.reset(); }; + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(1, header_calls); +} + +TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk1) { + AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); + response->cancel(); + int chunk_calls = 0; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, chunk_calls); +} + +TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { + AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); + blockMainThreadAndCancelResponseAfterResume(response); + int chunk_calls = 0; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, chunk_calls); +} + +TEST_F(AdminStreamingTest, CancelAfterAskingForChunk) { + AdminResponseSharedPtr response = streamingResponse(); + waitForHeaders(response); + blockMainThreadUntilResume("/ready", "GET"); + int chunk_calls = 0; + + // Cause the /streaming handler to pause while yielding the next chunk, to hit + // an early exit in MainCommonBase::requestNextChunk. + next_chunk_hook_ = [response]() { response->cancel(); }; + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); + EXPECT_EQ(0, chunk_calls); +} + +TEST_F(AdminStreamingTest, QuitBeforeHeaders) { + AdminResponseSharedPtr response = streamingResponse(); + EXPECT_TRUE(quitAndWait()); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(1, chunks_bytes.first); + EXPECT_EQ(0, chunks_bytes.second); +} + +TEST_F(AdminStreamingTest, QuitDeleteRace) { + AdminResponseSharedPtr response = streamingResponse(); + // Initiates a streaming quit on the main thread, but do not wait for it. + quitAndRequestHeaders(); + response.reset(); // Races with the quitquitquit + EXPECT_TRUE(waitForEnvoyToExit()); +} + +TEST_F(AdminStreamingTest, QuitCancelRace) { + AdminResponseSharedPtr response = streamingResponse(); + quitAndRequestHeaders(); + response->cancel(); // Races with the quitquitquit + EXPECT_TRUE(waitForEnvoyToExit()); +} + +TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { + // Initiates a streaming quit on the main thread, and wait for headers, which + // will trigger the termination of the event loop, and subsequent nulling of + // main_common_. However we can pause the test infrastructure after the quit + // takes hold leaving main_common_ in tact, to reproduce a potential race. + pause_after_run_ = true; + adminRequest("/quitquitquit", "POST"); + pause_point_.WaitForNotification(); // run() finished, but main_common_ still exists. + AdminResponseSharedPtr response = streamingResponse(); + ChunksBytes chunks_bytes = runStreamingRequest(response); + EXPECT_EQ(1, chunks_bytes.first); + EXPECT_EQ(0, chunks_bytes.second); + resume_.Notify(); + EXPECT_TRUE(waitForEnvoyToExit()); + response.reset(); +} + +TEST_F(AdminStreamingTest, QuitTerminateRace) { + AdminResponseSharedPtr response = streamingResponse(); + adminRequest("/quitquitquit", "POST"); + response.reset(); + EXPECT_TRUE(waitForEnvoyToExit()); +} + +} // namespace Envoy diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index e1025a17eecd..541f4734587b 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -9,6 +9,7 @@ #include "source/exe/platform_impl.h" #include "source/server/options_impl.h" +#include "test/exe/main_common_test_base.h" #include "test/mocks/common.h" #include "test/test_common/contention.h" #include "test/test_common/environment.h" @@ -52,34 +53,10 @@ const std::string& outOfMemoryPattern() { * an argv array that is terminated with nullptr. Identifies the config * file relative to runfiles directory. */ -class MainCommonTest : public testing::TestWithParam { -protected: - MainCommonTest() - : config_file_(TestEnvironment::temporaryFileSubstitute( - "test/config/integration/google_com_proxy_port_0.yaml", TestEnvironment::ParamMap(), - TestEnvironment::PortMap(), GetParam())), - argv_({"envoy-static", "--use-dynamic-base-id", "-c", config_file_.c_str(), nullptr}) {} - - const char* const* argv() { return &argv_[0]; } - int argc() { return argv_.size() - 1; } - - // Adds an argument, assuring that argv remains null-terminated. - void addArg(const char* arg) { - ASSERT(!argv_.empty()); - const size_t last = argv_.size() - 1; - ASSERT(argv_[last] == nullptr); // invariant established in ctor, maintained below. - argv_[last] = arg; // guaranteed non-empty - argv_.push_back(nullptr); - } - - // Adds options to make Envoy exit immediately after initialization. - void initOnly() { - addArg("--mode"); - addArg("init_only"); - } - - std::string config_file_; - std::vector argv_; +class MainCommonTest : public MainCommonTestBase, + public testing::TestWithParam { + protected: + MainCommonTest() : MainCommonTestBase(GetParam()) {} }; INSTANTIATE_TEST_SUITE_P(IpVersions, MainCommonTest, testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), @@ -278,101 +255,10 @@ TEST_P(MainCommonDeathTest, OutOfMemoryHandler) { #endif } -class AdminRequestTest : public MainCommonTest { -protected: - AdminRequestTest() { addArg("--disable-hot-restart"); } - - // Runs an admin request specified in path, blocking until completion, and - // returning the response body. - std::string adminRequest(absl::string_view path, absl::string_view method) { - absl::Notification done; - std::string out; - main_common_->adminRequest( - path, method, - [&done, &out](const Http::HeaderMap& /*response_headers*/, absl::string_view body) { - out = std::string(body); - done.Notify(); - }); - done.WaitForNotification(); - return out; - } - - // Initiates Envoy running in its own thread. - void startEnvoy() { - envoy_thread_ = Thread::threadFactoryForTest().createThread([this]() { - // Note: main_common_ is accessed in the testing thread, but - // is race-free, as MainCommon::run() does not return until - // triggered with an adminRequest POST to /quitquitquit, which - // is done in the testing thread. - main_common_ = std::make_unique(argc(), argv()); - envoy_started_ = true; - started_.Notify(); - pauseResumeInterlock(pause_before_run_); - bool status = main_common_->run(); - pauseResumeInterlock(pause_after_run_); - main_common_.reset(); - envoy_finished_ = true; - envoy_return_ = status; - finished_.Notify(); - }); - } - - // Conditionally pauses at a critical point in the Envoy thread, waiting for - // the test thread to trigger something at that exact line. The test thread - // can then call resume_.Notify() to allow the Envoy thread to resume. - void pauseResumeInterlock(bool enable) { - if (enable) { - pause_point_.Notify(); - resume_.WaitForNotification(); - } - } - - // Wait until Envoy is inside the main server run loop proper. Before entering, Envoy runs any - // pending post callbacks, so it's not reliable to use adminRequest() or post() to do this. - // Generally, tests should not depend on this for correctness, but as a result of - // https://github.com/libevent/libevent/issues/779 we need to for TSAN. This is because the entry - // to event_base_loop() is where the signal base race occurs, but once we're in that loop in - // blocking mode, we're safe to take signals. - // TODO(htuch): Remove when https://github.com/libevent/libevent/issues/779 is fixed. - void waitForEnvoyRun() { - absl::Notification done; - main_common_->dispatcherForTest().post([this, &done] { - struct Sacrifice : Event::DeferredDeletable { - Sacrifice(absl::Notification& notify) : notify_(notify) {} - ~Sacrifice() override { notify_.Notify(); } - absl::Notification& notify_; - }; - auto sacrifice = std::make_unique(done); - // Wait for a deferred delete cleanup, this only happens in the main server run loop. - main_common_->dispatcherForTest().deferredDelete(std::move(sacrifice)); - }); - done.WaitForNotification(); - } - - // Having triggered Envoy to quit (via signal or /quitquitquit), this blocks until Envoy exits. - bool waitForEnvoyToExit() { - finished_.WaitForNotification(); - envoy_thread_->join(); - return envoy_return_; - } - - void quitAndWait() { - adminRequest("/quitquitquit", "POST"); - EXPECT_TRUE(waitForEnvoyToExit()); - } - - Stats::IsolatedStoreImpl stats_store_; - std::unique_ptr envoy_thread_; - std::unique_ptr main_common_; - absl::Notification started_; - absl::Notification finished_; - absl::Notification resume_; - absl::Notification pause_point_; - bool envoy_return_{false}; - bool envoy_started_{false}; - bool envoy_finished_{false}; - bool pause_before_run_{false}; - bool pause_after_run_{false}; +class AdminRequestTest : public AdminRequestTestBase, + public testing::TestWithParam { + protected: + AdminRequestTest() : AdminRequestTestBase(GetParam()) {} }; INSTANTIATE_TEST_SUITE_P(IpVersions, AdminRequestTest, testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), @@ -528,338 +414,4 @@ TEST_P(AdminRequestTest, AdminRequestAfterRun) { EXPECT_EQ(1, lambda_destroy_count); } -class AdminStreamingTest : public AdminRequestTest { -protected: - static constexpr absl::string_view StreamingEndpoint = "/stream"; - - class StreamingAdminRequest : public Envoy::Server::Admin::Request { - public: - static constexpr uint64_t NumChunks = 10; - static constexpr uint64_t BytesPerChunk = 10000; - - StreamingAdminRequest(std::function& get_headers_hook, - std::function& next_chunk_hook) - : chunk_(BytesPerChunk, 'a'), get_headers_hook_(get_headers_hook), - next_chunk_hook_(next_chunk_hook) {} - Http::Code start(Http::ResponseHeaderMap&) override { - get_headers_hook_(); - return Http::Code::OK; - } - bool nextChunk(Buffer::Instance& response) override { - next_chunk_hook_(); - response.add(chunk_); - return --chunks_remaining_ > 0; - } - - private: - const std::string chunk_; - uint64_t chunks_remaining_{NumChunks}; - std::function& get_headers_hook_; - std::function& next_chunk_hook_; - }; - - AdminStreamingTest() { - startEnvoy(); - started_.WaitForNotification(); - Server::Admin& admin = *main_common_->server()->admin(); - admin.addStreamingHandler( - std::string(StreamingEndpoint), "streaming api", - [this](Server::AdminStream&) -> Server::Admin::RequestPtr { - return std::make_unique(get_headers_hook_, next_chunk_hook_); - }, - true, false); - } - - using ChunksBytes = std::pair; - ChunksBytes runStreamingRequest(MainCommonBase::AdminResponseSharedPtr response, - std::function chunk_hook = nullptr) { - absl::Notification done; - std::vector out; - absl::Notification headers_notify; - response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); - headers_notify.WaitForNotification(); - bool cont = true; - uint64_t num_chunks = 0; - uint64_t num_bytes = 0; - while (cont && !response->cancelled()) { - absl::Notification chunk_notify; - response->nextChunk( - [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { - cont = more; - num_bytes += chunk.length(); - chunk.drain(chunk.length()); - ++num_chunks; - chunk_notify.Notify(); - }); - chunk_notify.WaitForNotification(); - if (chunk_hook != nullptr) { - chunk_hook(); - } - } - - return ChunksBytes(num_chunks, num_bytes); - } - - /** - * @return a streaming response to a GET of StreamingEndpoint. - */ - MainCommonBase::AdminResponseSharedPtr streamingResponse() { - return main_common_->adminRequest(StreamingEndpoint, "GET"); - } - - /** - * In order to trigger certain early-exit criteria in a test, we can exploit - * the fact that all the admin responses are delivered on the main thread. - * So we can pause those by blocking the main thread indefinitely. - * - * To resume the main thread, call resume_.Notify(); - * - * @param url the stats endpoint to initiate. - */ - void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { - MainCommonBase::AdminResponseSharedPtr blocked_response = - main_common_->adminRequest(url, method); - absl::Notification block_main_thread; - blocked_response->getHeaders( - [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); - } - - /** - * To hit an early exit after the second lock in - * AdminResponseImpl::requestNextChunk and AdminResponseImpl::requestHeaders - * we, need to post a cancel request to the main thread, but block that on the - * resume_ notification. This allows a subsequent test call to getHeaders or - * nextChunk initiate a post to main thread, but runs the cancel call on the - * response before headers_fn_ or body_fn_ runs. - * - * @param response the response to cancel after resume_. - */ - void - blockMainThreadAndCancelResponseAfterResume(MainCommonBase::AdminResponseSharedPtr response) { - main_common_->dispatcherForTest().post([response, this] { - resume_.WaitForNotification(); - response->cancel(); - }); - } - - /** - * Requests the headers and waits until the headers have been sent. - * - * @param response the response from which to get headers. - */ - void waitForHeaders(MainCommonBase::AdminResponseSharedPtr response) { - absl::Notification headers_notify; - response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); - headers_notify.WaitForNotification(); - } - - /** - * Initiates a '/quitquitquit' call and requests the headers for that call, - * but does not wait for the call to complete. We avoid waiting in order to - * trigger a potential race to ensure that MainCommon handles it properly. - */ - void quitAndRequestHeaders() { - MainCommonBase::AdminResponseSharedPtr quit_response = - main_common_->adminRequest("/quitquitquit", "POST"); - quit_response->getHeaders([](Http::Code, Http::ResponseHeaderMap&) {}); - } - - // This variable provides a hook to allow a test method to specify a hook to - // run when nextChunk() is called. This is currently used only for one test, - // CancelAfterAskingForChunk, that initiates a cancel() from within the chunk - // handler. - std::function get_headers_hook_ = []() {}; - std::function next_chunk_hook_ = []() {}; -}; - -INSTANTIATE_TEST_SUITE_P(IpVersions, AdminStreamingTest, - testing::ValuesIn(TestEnvironment::getIpVersionsForTest()), - TestUtility::ipTestParamsToString); - -TEST_P(AdminStreamingTest, RequestGetStatsAndQuit) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); - EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); - quitAndWait(); -} - -TEST_P(AdminStreamingTest, QuitDuringChunks) { - int quit_counter = 0; - static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { - if (++quit_counter == chunks_to_send_before_quitting) { - quitAndWait(); - } - }); - EXPECT_EQ(4, chunks_bytes.first); - EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); -} - -TEST_P(AdminStreamingTest, CancelDuringChunks) { - int quit_counter = 0; - static constexpr int chunks_to_send_before_quitting = 3; - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { - if (++quit_counter == chunks_to_send_before_quitting) { - response->cancel(); - } - }); - EXPECT_EQ(3, chunks_bytes.first); // no final call to the chunk handler after cancel. - EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); - quitAndWait(); -} - -TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader1) { - blockMainThreadUntilResume("/ready", "GET"); - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - response->cancel(); - resume_.Notify(); - int header_calls = 0; - - // After 'cancel', the headers function will not be called. - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - quitAndWait(); - EXPECT_EQ(0, header_calls); -} - -TEST_P(AdminStreamingTest, CancelBeforeAskingForHeader2) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - blockMainThreadAndCancelResponseAfterResume(response); - int header_calls = 0; - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - resume_.Notify(); - quitAndWait(); - EXPECT_EQ(0, header_calls); -} - -TEST_P(AdminStreamingTest, CancelAfterAskingForHeader1) { - int header_calls = 0; - blockMainThreadUntilResume("/ready", "GET"); - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - response->cancel(); - resume_.Notify(); - quitAndWait(); - EXPECT_EQ(0, header_calls); -} - -TEST_P(AdminStreamingTest, CancelAfterAskingForHeader2) { - int header_calls = 0; - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - get_headers_hook_ = [&response]() { response->cancel(); }; - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - quitAndWait(); - EXPECT_EQ(0, header_calls); -} - -TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader1) { - int header_calls = 0; - blockMainThreadUntilResume("/ready", "GET"); - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - response.reset(); - resume_.Notify(); - quitAndWait(); - EXPECT_EQ(1, header_calls); -} - -TEST_P(AdminStreamingTest, DeleteAfterAskingForHeader2) { - int header_calls = 0; - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - get_headers_hook_ = [&response]() { response.reset(); }; - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - quitAndWait(); - EXPECT_EQ(1, header_calls); -} - -TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk1) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - waitForHeaders(response); - response->cancel(); - int chunk_calls = 0; - response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - quitAndWait(); - EXPECT_EQ(0, chunk_calls); -} - -TEST_P(AdminStreamingTest, CancelBeforeAskingForChunk2) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - waitForHeaders(response); - blockMainThreadAndCancelResponseAfterResume(response); - int chunk_calls = 0; - response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - resume_.Notify(); - quitAndWait(); - EXPECT_EQ(0, chunk_calls); -} - -TEST_P(AdminStreamingTest, CancelAfterAskingForChunk) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - waitForHeaders(response); - blockMainThreadUntilResume("/ready", "GET"); - int chunk_calls = 0; - - // Cause the /streaming handler to pause while yielding the next chunk, to hit - // an early exit in MainCommonBase::requestNextChunk. - next_chunk_hook_ = [response]() { response->cancel(); }; - response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - resume_.Notify(); - quitAndWait(); - EXPECT_EQ(0, chunk_calls); -} - -TEST_P(AdminStreamingTest, QuitBeforeHeaders) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - quitAndWait(); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(1, chunks_bytes.first); - EXPECT_EQ(0, chunks_bytes.second); -} - -TEST_P(AdminStreamingTest, QuitDeleteRace) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - // Initiates a streaming quit on the main thread, but do not wait for it. - quitAndRequestHeaders(); - response.reset(); // Races with the quitquitquit - EXPECT_TRUE(waitForEnvoyToExit()); -} - -TEST_P(AdminStreamingTest, QuitCancelRace) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - quitAndRequestHeaders(); - response->cancel(); // Races with the quitquitquit - EXPECT_TRUE(waitForEnvoyToExit()); -} - -TEST_P(AdminStreamingTest, QuitBeforeCreatingResponse) { - // Initiates a streaming quit on the main thread, and wait for headers, which - // will trigger the termination of the event loop, and subsequent nulling of - // main_common_. However we can pause the test infrastructure after the quit - // takes hold leaving main_common_ in tact, to reproduce a potential race. - pause_after_run_ = true; - adminRequest("/quitquitquit", "POST"); - pause_point_.WaitForNotification(); // run() finished, but main_common_ still exists. - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(1, chunks_bytes.first); - EXPECT_EQ(0, chunks_bytes.second); - resume_.Notify(); - EXPECT_TRUE(waitForEnvoyToExit()); - response.reset(); -} - -TEST_P(AdminStreamingTest, QuitTerminateRace) { - MainCommonBase::AdminResponseSharedPtr response = streamingResponse(); - adminRequest("/quitquitquit", "POST"); - response.reset(); - EXPECT_TRUE(waitForEnvoyToExit()); -} - } // namespace Envoy diff --git a/test/exe/main_common_test_base.cc b/test/exe/main_common_test_base.cc new file mode 100644 index 000000000000..d0f3960b4148 --- /dev/null +++ b/test/exe/main_common_test_base.cc @@ -0,0 +1,117 @@ +#include "test/exe/main_common_test_base.h" + +#include "source/common/common/thread.h" + +#include "test/test_common/thread_factory_for_test.h" + +namespace Envoy { + +MainCommonTestBase::MainCommonTestBase(Network::Address::IpVersion version) + : config_file_(TestEnvironment::temporaryFileSubstitute( + "test/config/integration/google_com_proxy_port_0.yaml", TestEnvironment::ParamMap(), + TestEnvironment::PortMap(), version)), + argv_({"envoy-static", "--use-dynamic-base-id", "-c", config_file_.c_str(), nullptr}) {} + +const char* const* MainCommonTestBase::argv() { return &argv_[0]; } +int MainCommonTestBase::argc() { return argv_.size() - 1; } + +// Adds an argument, assuring that argv remains null-terminated. +void MainCommonTestBase::addArg(const char* arg) { + ASSERT(!argv_.empty()); + const size_t last = argv_.size() - 1; + ASSERT(argv_[last] == nullptr); // invariant established in ctor, maintained below. + argv_[last] = arg; // guaranteed non-empty + argv_.push_back(nullptr); +} + +// Adds options to make Envoy exit immediately after initialization. +void MainCommonTestBase::initOnly() { + addArg("--mode"); + addArg("init_only"); +} + +AdminRequestTestBase::AdminRequestTestBase(Network::Address::IpVersion version) + : MainCommonTestBase(version) { + addArg("--disable-hot-restart"); +} + +// Runs an admin request specified in path, blocking until completion, and +// returning the response body. +std::string AdminRequestTestBase::adminRequest(absl::string_view path, absl::string_view method) { + absl::Notification done; + std::string out; + main_common_->adminRequest( + path, method, + [&done, &out](const Http::HeaderMap& /*response_headers*/, absl::string_view body) { + out = std::string(body); + done.Notify(); + }); + done.WaitForNotification(); + return out; +} + +// Initiates Envoy running in its own thread. +void AdminRequestTestBase::startEnvoy() { + envoy_thread_ = Thread::threadFactoryForTest().createThread([this]() { + // Note: main_common_ is accessed in the testing thread, but + // is race-free, as MainCommon::run() does not return until + // triggered with an adminRequest POST to /quitquitquit, which + // is done in the testing thread. + main_common_ = std::make_unique(argc(), argv()); + envoy_started_ = true; + started_.Notify(); + pauseResumeInterlock(pause_before_run_); + bool status = main_common_->run(); + pauseResumeInterlock(pause_after_run_); + main_common_.reset(); + envoy_finished_ = true; + envoy_return_ = status; + finished_.Notify(); + }); +} + +// Conditionally pauses at a critical point in the Envoy thread, waiting for +// the test thread to trigger something at that exact line. The test thread +// can then call resume_.Notify() to allow the Envoy thread to resume. +void AdminRequestTestBase::pauseResumeInterlock(bool enable) { + if (enable) { + pause_point_.Notify(); + resume_.WaitForNotification(); + } +} + +// Wait until Envoy is inside the main server run loop proper. Before entering, Envoy runs any +// pending post callbacks, so it's not reliable to use adminRequest() or post() to do this. +// Generally, tests should not depend on this for correctness, but as a result of +// https://github.com/libevent/libevent/issues/779 we need to for TSAN. This is because the entry +// to event_base_loop() is where the signal base race occurs, but once we're in that loop in +// blocking mode, we're safe to take signals. +// TODO(htuch): Remove when https://github.com/libevent/libevent/issues/779 is fixed. +void AdminRequestTestBase::waitForEnvoyRun() { + absl::Notification done; + main_common_->dispatcherForTest().post([this, &done] { + struct Sacrifice : Event::DeferredDeletable { + Sacrifice(absl::Notification& notify) : notify_(notify) {} + ~Sacrifice() override { notify_.Notify(); } + absl::Notification& notify_; + }; + auto sacrifice = std::make_unique(done); + // Wait for a deferred delete cleanup, this only happens in the main server run loop. + main_common_->dispatcherForTest().deferredDelete(std::move(sacrifice)); + }); + done.WaitForNotification(); +} + +// Having triggered Envoy to quit (via signal or /quitquitquit), this blocks until Envoy exits. +bool AdminRequestTestBase::waitForEnvoyToExit() { + finished_.WaitForNotification(); + envoy_thread_->join(); + return envoy_return_; +} + +bool AdminRequestTestBase::quitAndWait() { + adminRequest("/quitquitquit", "POST"); + return waitForEnvoyToExit(); +} + +} // namespace Envoy diff --git a/test/exe/main_common_test_base.h b/test/exe/main_common_test_base.h new file mode 100644 index 000000000000..91ae895dd69f --- /dev/null +++ b/test/exe/main_common_test_base.h @@ -0,0 +1,77 @@ +#pragma once + +#include +#include + +#include "source/common/stats/isolated_store_impl.h" +#include "source/exe/main_common.h" + +#include "test/test_common/environment.h" + +#include "absl/synchronization/notification.h" + +namespace Envoy { + +class MainCommonTestBase { +protected: + MainCommonTestBase(Network::Address::IpVersion version); + const char* const* argv(); + int argc(); + + // Adds an argument, assuring that argv remains null-terminated. + void addArg(const char* arg); + + // Adds options to make Envoy exit immediately after initialization. + void initOnly(); + + std::string config_file_; + std::vector argv_; +}; + +class AdminRequestTestBase : public MainCommonTestBase { +protected: + AdminRequestTestBase(Network::Address::IpVersion version); + + // Runs an admin request specified in path, blocking until completion, and + // returning the response body. + std::string adminRequest(absl::string_view path, absl::string_view method); + + // Initiates Envoy running in its own thread. + void startEnvoy(); + + // Conditionally pauses at a critical point in the Envoy thread, waiting for + // the test thread to trigger something at that exact line. The test thread + // can then call resume_.Notify() to allow the Envoy thread to resume. + void pauseResumeInterlock(bool enable); + + // Wait until Envoy is inside the main server run loop proper. Before entering, Envoy runs any + // pending post callbacks, so it's not reliable to use adminRequest() or post() to do this. + // Generally, tests should not depend on this for correctness, but as a result of + // https://github.com/libevent/libevent/issues/779 we need to for TSAN. This is because the entry + // to event_base_loop() is where the signal base race occurs, but once we're in that loop in + // blocking mode, we're safe to take signals. + // TODO(htuch): Remove when https://github.com/libevent/libevent/issues/779 is fixed. + void waitForEnvoyRun(); + + // Having triggered Envoy to quit (via signal or /quitquitquit), this blocks until Envoy exits. + bool waitForEnvoyToExit(); + + // Sends a quit request to the server, and waits for Envoy to exit. Returns + // true if successful. + bool quitAndWait(); + + Stats::IsolatedStoreImpl stats_store_; + std::unique_ptr envoy_thread_; + std::unique_ptr main_common_; + absl::Notification started_; + absl::Notification finished_; + absl::Notification resume_; + absl::Notification pause_point_; + bool envoy_return_{false}; + bool envoy_started_{false}; + bool envoy_finished_{false}; + bool pause_before_run_{false}; + bool pause_after_run_{false}; +}; + +} // namespace Envoy From a6d49b1091e5747cc69aa51233f8c8788d1b50be Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 26 Feb 2024 20:55:45 -0500 Subject: [PATCH 26/39] remove no-longer-needed 'friend' declarations and rename TerminateNotifier per discussion Signed-off-by: Joshua Marantz --- source/exe/admin_response.cc | 13 +++++---- source/exe/admin_response.h | 52 +++++++++++++++++------------------- source/exe/main_common.cc | 8 +++--- source/exe/main_common.h | 2 +- 4 files changed, 36 insertions(+), 39 deletions(-) diff --git a/source/exe/admin_response.cc b/source/exe/admin_response.cc index 42ddb0f40be5..c5745217e7e4 100644 --- a/source/exe/admin_response.cc +++ b/source/exe/admin_response.cc @@ -8,16 +8,15 @@ namespace Envoy { AdminResponse::AdminResponse(Server::Instance& server, absl::string_view path, - absl::string_view method, - TerminateNotifierSharedPtr terminate_notifier) - : server_(server), opt_admin_(server.admin()), terminate_notifier_(terminate_notifier) { + absl::string_view method, SharedPtrSet response_set) + : server_(server), opt_admin_(server.admin()), shared_response_set_(response_set) { request_headers_->setMethod(method); request_headers_->setPath(path); } AdminResponse::~AdminResponse() { cancel(); - terminate_notifier_->detachResponse(this); + shared_response_set_->detachResponse(this); } void AdminResponse::getHeaders(HeadersFn fn) { @@ -161,7 +160,7 @@ void AdminResponse::sendErrorLockHeld() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { } } -void TerminateNotifier::terminateAdminRequests() { +void AdminResponse::PtrSet::terminateAdminRequests() { ASSERT_IS_MAIN_OR_TEST_THREAD(); absl::MutexLock lock(&mutex_); @@ -176,7 +175,7 @@ void TerminateNotifier::terminateAdminRequests() { response_set_.clear(); } -void TerminateNotifier::attachResponse(AdminResponse* response) { +void AdminResponse::PtrSet::attachResponse(AdminResponse* response) { absl::MutexLock lock(&mutex_); if (accepting_admin_requests_) { response_set_.insert(response); @@ -185,7 +184,7 @@ void TerminateNotifier::attachResponse(AdminResponse* response) { } } -void TerminateNotifier::detachResponse(AdminResponse* response) { +void AdminResponse::PtrSet::detachResponse(AdminResponse* response) { absl::MutexLock lock(&mutex_); response_set_.erase(response); } diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index 3c8dbc811106..931a1ed9292f 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -14,28 +14,6 @@ namespace Envoy { class AdminResponse; -// AdminResponse can outlive MainCommonBase. But AdminResponse needs a -// reliable way of knowing whether MainCommonBase is alive, so we do this with -// TerminateNotifier, which is held by MainCommonBase and all the active -// AdminResponses via shared_ptr. This gives MainCommonBase a reliable way of -// notifying all active responses that it is being shut down, and thus all -// responses need to be terminated. And it gives a reliable way for -// AdminResponse to detach itself, even if MainCommonBase is already deleted. -class TerminateNotifier { -public: - void detachResponse(AdminResponse*); - void attachResponse(AdminResponse*); - - // Called after the server run-loop finishes; any outstanding streaming admin requests - // will otherwise hang as the main-thread dispatcher loop will no longer run. - void terminateAdminRequests(); - - mutable absl::Mutex mutex_; - absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); - bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; -}; -using TerminateNotifierSharedPtr = std::shared_ptr; - // Holds context for a streaming response from the admin system, enabling // flow-control into another system. This is particularly important when the // generated response is very large, such that holding it in memory may cause @@ -55,8 +33,31 @@ using TerminateNotifierSharedPtr = std::shared_ptr; // cancel() is called, no further callbacks will be called by the response. class AdminResponse : public std::enable_shared_from_this { public: + // AdminResponse can outlive MainCommonBase. But AdminResponse needs a + // reliable way of knowing whether MainCommonBase is alive, so we do this with + // PtrSet, which is held by MainCommonBase and all the active AdminResponse, + // which is held by MainCommonBase and by AdminResponse via shared_ptr. This + // gives MainCommonBase a reliable way of notifying all active responses that + // it is being shut down, and thus all responses need to be terminated. And it + // gives a reliable way for AdminResponse to detach itself, even if + // MainCommonBase is already deleted. + class PtrSet { + public: + void detachResponse(AdminResponse*); + void attachResponse(AdminResponse*); + + // Called after the server run-loop finishes; any outstanding streaming admin requests + // will otherwise hang as the main-thread dispatcher loop will no longer run. + void terminateAdminRequests(); + + mutable absl::Mutex mutex_; + absl::flat_hash_set response_set_ ABSL_GUARDED_BY(mutex_); + bool accepting_admin_requests_ ABSL_GUARDED_BY(mutex_) = true; + }; + using SharedPtrSet = std::shared_ptr; + AdminResponse(Server::Instance& server, absl::string_view path, absl::string_view method, - TerminateNotifierSharedPtr terminate_notifier); + SharedPtrSet response_set); ~AdminResponse(); /** @@ -112,9 +113,6 @@ class AdminResponse : public std::enable_shared_from_this { bool cancelled() const; private: - friend class MainCommonBase; - friend class TerminateNotifier; - /** * Called when the server is terminated. This calls any outstanding * callbacks to be called. If nextChunk is called after termination, @@ -159,7 +157,7 @@ class AdminResponse : public std::enable_shared_from_this { BodyFn body_fn_ ABSL_GUARDED_BY(mutex_); mutable absl::Mutex mutex_; - TerminateNotifierSharedPtr terminate_notifier_; + SharedPtrSet shared_response_set_; }; using AdminResponseSharedPtr = std::shared_ptr; diff --git a/source/exe/main_common.cc b/source/exe/main_common.cc index bdf26c324f58..87b3342f3335 100644 --- a/source/exe/main_common.cc +++ b/source/exe/main_common.cc @@ -60,7 +60,7 @@ MainCommonBase::MainCommonBase(const Server::Options& options, Event::TimeSystem std::move(process_context), createFunction()) #ifdef ENVOY_ADMIN_FUNCTIONALITY , - terminate_notifier_(std::make_shared()) + shared_response_set_(std::make_shared()) #endif { } @@ -74,7 +74,7 @@ bool MainCommonBase::run() { case Server::Mode::Serve: runServer(); #ifdef ENVOY_ADMIN_FUNCTIONALITY - terminate_notifier_->terminateAdminRequests(); + shared_response_set_->terminateAdminRequests(); #endif ret = true; break; @@ -114,8 +114,8 @@ void MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string AdminResponseSharedPtr MainCommonBase::adminRequest(absl::string_view path_and_query, absl::string_view method) { auto response = - std::make_shared(*server(), path_and_query, method, terminate_notifier_); - terminate_notifier_->attachResponse(response.get()); + std::make_shared(*server(), path_and_query, method, shared_response_set_); + shared_response_set_->attachResponse(response.get()); return response; } #endif diff --git a/source/exe/main_common.h b/source/exe/main_common.h index ade0915aaada..c70897e22bbd 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -60,7 +60,7 @@ class MainCommonBase : public StrippedMainBase { void detachResponse(AdminResponse*); private: - TerminateNotifierSharedPtr terminate_notifier_; + AdminResponse::SharedPtrSet shared_response_set_; #endif }; From 153b2a28a87894da6b8cd6e85824860685d00cf7 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 26 Feb 2024 22:39:57 -0500 Subject: [PATCH 27/39] format Signed-off-by: Joshua Marantz --- test/exe/main_common_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 541f4734587b..6b715f12a424 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -55,7 +55,7 @@ const std::string& outOfMemoryPattern() { */ class MainCommonTest : public MainCommonTestBase, public testing::TestWithParam { - protected: +protected: MainCommonTest() : MainCommonTestBase(GetParam()) {} }; INSTANTIATE_TEST_SUITE_P(IpVersions, MainCommonTest, @@ -257,7 +257,7 @@ TEST_P(MainCommonDeathTest, OutOfMemoryHandler) { class AdminRequestTest : public AdminRequestTestBase, public testing::TestWithParam { - protected: +protected: AdminRequestTest() : AdminRequestTestBase(GetParam()) {} }; INSTANTIATE_TEST_SUITE_P(IpVersions, AdminRequestTest, From 36195fe8e6f3f5edb937eb56de2f2cf3085fcc30 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 27 Feb 2024 09:50:54 -0500 Subject: [PATCH 28/39] cleanup & format Signed-off-by: Joshua Marantz --- source/exe/README.md | 2 +- source/exe/admin_response.h | 29 +++++++++++++++---- source/exe/main_common.h | 50 ++++++++++++++++++++++----------- test/exe/admin_response_test.cc | 2 +- test/exe/main_common_test.cc | 1 - 5 files changed, 58 insertions(+), 26 deletions(-) diff --git a/source/exe/README.md b/source/exe/README.md index bb3beb5ba8ef..b8a0b6c70272 100644 --- a/source/exe/README.md +++ b/source/exe/README.md @@ -3,7 +3,7 @@ State Action Next State Side Effects Ground adminRequest(u,m) ResponseInitial ResponseInitial getHeaders HeadersWait post to main thread to get headers HeadersWait postComplete HeadersSent call HeadersFn -HeadersSent +HeadersSent ResponseInitial cancel ResponseCancelled diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index 931a1ed9292f..fa92966d439c 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -43,11 +43,29 @@ class AdminResponse : public std::enable_shared_from_this { // MainCommonBase is already deleted. class PtrSet { public: - void detachResponse(AdminResponse*); - void attachResponse(AdminResponse*); - - // Called after the server run-loop finishes; any outstanding streaming admin requests - // will otherwise hang as the main-thread dispatcher loop will no longer run. + /** + * Called when an AdminResponse is created. When terminateAdminRequests is + * called, all outstanding response objects have their terminate() methods + * called. + * + * @param response the response pointer to be added to the set. + */ + void attachResponse(AdminResponse* response); + + /** + * Called when an AdminResponse is terminated, either by completing normally + * or having the caller call cancel on it. Either way it needs to be removed + * from the set that will be used by terminateAdminRequests below. + * + * @param response the response pointer to be removed from the set. + */ + void detachResponse(AdminResponse* response); + + /** + * Called after the server run-loop finishes; any outstanding streaming + * admin requests will otherwise hang as the main-thread dispatcher loop + * will no longer run. + */ void terminateAdminRequests(); mutable absl::Mutex mutex_; @@ -131,7 +149,6 @@ class AdminResponse : public std::enable_shared_from_this { Buffer::OwnedImpl response_; Http::Code code_; Server::Admin::RequestPtr request_; - // CleanupFn cleanup_; Http::RequestHeaderMapPtr request_headers_{Http::RequestHeaderMapImpl::create()}; Http::ResponseHeaderMapPtr response_headers_{Http::ResponseHeaderMapImpl::create()}; bool more_data_ = true; diff --git a/source/exe/main_common.h b/source/exe/main_common.h index c70897e22bbd..9d7fb3ffdf10 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -38,26 +38,42 @@ class MainCommonBase : public StrippedMainBase { using AdminRequestFn = std::function; - // Makes an admin-console request by path, calling handler() when complete. - // The caller can initiate this from any thread, but it posts the request - // onto the main thread, so the handler is called asynchronously. - // - // This is designed to be called from downstream consoles, so they can access - // the admin console information stream without opening up a network port. - // - // This should only be called while run() is active; ensuring this is the - // responsibility of the caller. - // - // TODO(jmarantz): consider std::future for encapsulating this delayed request - // semantics, rather than a handler callback. + /** + * Makes an admin-console request by path, calling handler() when complete. + * The caller can initiate this from any thread, but it posts the request + * onto the main thread, so the handler is called asynchronously. + * + * This is designed to be called from downstream consoles, so they can access + * the admin console information stream without opening up a network port. + * + * This should only be called while run() is active; ensuring this is the + * responsibility of the caller. + * + * TODO(jmarantz): consider std::future for encapsulating this delayed request + * semantics, rather than a handler callback. + * + * Consider using the 2-arg version of adminRequest, below, which enables + * streaming of large responses one chunk at a time, without holding + * potentially huge response text in memory. + * + * @param path_and_query the URL to send to admin, including any query params. + * @param method the HTTP method: "GET" or "POST" + * @param handler an async callback that will be sent the serialized headers + * and response. + */ void adminRequest(absl::string_view path_and_query, absl::string_view method, const AdminRequestFn& handler); - AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, absl::string_view method); - // Called when a streaming response is terminated, either by completing normally - // or having the caller call cancel on it. Either way it needs to be removed from - // the set that will be used by terminateAdminRequests below. - void detachResponse(AdminResponse*); + /** + * Initiates a streaming response to an admin request. The caller interacts + * with the returned AdminResponse object, and can thus control the pace of + * handling chunks of response text. + * + * @param path_and_query the URL to send to admin, including any query params. + * @param method the HTTP method: "GET" or "POST" + * @return AdminResponseSharedPtr the response object + */ + AdminResponseSharedPtr adminRequest(absl::string_view path_and_query, absl::string_view method); private: AdminResponse::SharedPtrSet shared_response_set_; diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index ff4d94dbeb73..97473eb5e354 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -276,7 +276,7 @@ TEST_F(AdminStreamingTest, CancelAfterAskingForChunk) { int chunk_calls = 0; // Cause the /streaming handler to pause while yielding the next chunk, to hit - // an early exit in MainCommonBase::requestNextChunk. + // an early exit in requestNextChunk. next_chunk_hook_ = [response]() { response->cancel(); }; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); resume_.Notify(); diff --git a/test/exe/main_common_test.cc b/test/exe/main_common_test.cc index 6b715f12a424..942a53e3e3b1 100644 --- a/test/exe/main_common_test.cc +++ b/test/exe/main_common_test.cc @@ -1,6 +1,5 @@ #include "envoy/common/platform.h" -#include "source/common/common/lock_guard.h" #include "source/common/common/mutex_tracer_impl.h" #include "source/common/common/random_generator.h" #include "source/common/common/thread.h" From 4fb78988a69b7a0231b5692e65ca7e96e8b3c966 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Tue, 27 Feb 2024 16:36:25 -0500 Subject: [PATCH 29/39] fix admin-disabled build Signed-off-by: Joshua Marantz --- source/exe/BUILD | 4 ++-- source/exe/main_common.h | 3 +++ test/exe/BUILD | 6 ++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/source/exe/BUILD b/source/exe/BUILD index f3250d4b7c67..0f7947c1d07a 100644 --- a/source/exe/BUILD +++ b/source/exe/BUILD @@ -7,6 +7,7 @@ load( "envoy_cc_posix_without_linux_library", "envoy_cc_win32_library", "envoy_package", + "envoy_select_admin_functionality", "envoy_select_enable_http3", "envoy_select_signal_trace", ) @@ -102,8 +103,7 @@ envoy_cc_library( hdrs = [ "main_common.h", ], - deps = [ - ":admin_response_lib", + deps = envoy_select_admin_functionality([":admin_response_lib"]) + [ ":platform_impl_lib", ":process_wide_lib", ":stripped_main_base_lib", diff --git a/source/exe/main_common.h b/source/exe/main_common.h index 9d7fb3ffdf10..500f293ce7f1 100644 --- a/source/exe/main_common.h +++ b/source/exe/main_common.h @@ -10,7 +10,10 @@ #include "source/common/stats/symbol_table.h" #include "source/common/stats/thread_local_store.h" #include "source/common/thread_local/thread_local_impl.h" + +#ifdef ENVOY_ADMIN_FUNCTIONALITY #include "source/exe/admin_response.h" +#endif #include "source/exe/process_wide.h" #include "source/exe/stripped_main_base.h" #include "source/server/listener_hooks.h" diff --git a/test/exe/BUILD b/test/exe/BUILD index 33f732b37af5..065b80e97014 100644 --- a/test/exe/BUILD +++ b/test/exe/BUILD @@ -87,8 +87,7 @@ envoy_cc_test( data = [ "//test/config/integration:google_com_proxy_port_0", ], - deps = [ - ":main_common_test_base_lib", + deps = envoy_select_admin_functionality([":main_common_test_base_lib"]) + [ "//source/common/api:api_lib", "//source/exe:envoy_main_common_with_core_extensions_lib", "//source/exe:platform_impl_lib", @@ -105,8 +104,7 @@ envoy_cc_test( data = [ "//test/config/integration:google_com_proxy_port_0", ], - deps = [ - ":main_common_test_base_lib", + deps = envoy_select_admin_functionality([":main_common_test_base_lib"]) + [ "//source/common/api:api_lib", "//source/exe:envoy_main_common_with_core_extensions_lib", "//source/exe:platform_impl_lib", From 0cf32c132ada894428a996bae049a0ff9df0869d Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 28 Feb 2024 15:29:18 -0500 Subject: [PATCH 30/39] address some review comments (others remain) Signed-off-by: Joshua Marantz --- source/server/admin/admin.cc | 1 - source/server/admin/admin_filter.cc | 1 + test/exe/admin_response_test.cc | 19 +++++++++---------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/source/server/admin/admin.cc b/source/server/admin/admin.cc index a7ffc489b97b..0b21be7fef7f 100644 --- a/source/server/admin/admin.cc +++ b/source/server/admin/admin.cc @@ -494,7 +494,6 @@ bool AdminImpl::removeHandler(const std::string& prefix) { Http::Code AdminImpl::request(absl::string_view path_and_query, absl::string_view method, Http::ResponseHeaderMap& response_headers, std::string& body) { - // AdminFilter filter(createRequestFunction()); AdminFilter filter(*this); auto request_headers = Http::RequestHeaderMapImpl::create(); diff --git a/source/server/admin/admin_filter.cc b/source/server/admin/admin_filter.cc index 7de083c5d1b7..d9dee2576612 100644 --- a/source/server/admin/admin_filter.cc +++ b/source/server/admin/admin_filter.cc @@ -92,6 +92,7 @@ void AdminFilter::onComplete() { decoder_callbacks_->encodeHeaders(std::move(header_map), false, StreamInfo::ResponseCodeDetails::get().AdminFilterResponse); + // TODO(#31087): use high/lower watermarks to apply flow-control to the admin http port. bool more_data; do { Buffer::OwnedImpl response; diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 97473eb5e354..b307831da54e 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -91,11 +91,10 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { * * To resume the main thread, call resume_.Notify(); * - * @param url the stats endpoint to initiate. + * @param url the admin endpoint to initiate. */ void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { AdminResponseSharedPtr blocked_response = main_common_->adminRequest(url, method); - absl::Notification block_main_thread; blocked_response->getHeaders( [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } @@ -292,7 +291,7 @@ TEST_F(AdminStreamingTest, QuitBeforeHeaders) { EXPECT_EQ(0, chunks_bytes.second); } -TEST_F(AdminStreamingTest, QuitDeleteRace) { +TEST_F(AdminStreamingTest, QuitDeleteRace1) { AdminResponseSharedPtr response = streamingResponse(); // Initiates a streaming quit on the main thread, but do not wait for it. quitAndRequestHeaders(); @@ -300,6 +299,13 @@ TEST_F(AdminStreamingTest, QuitDeleteRace) { EXPECT_TRUE(waitForEnvoyToExit()); } +TEST_F(AdminStreamingTest, QuitDeleteRace2) { + AdminResponseSharedPtr response = streamingResponse(); + adminRequest("/quitquitquit", "POST"); + response.reset(); + EXPECT_TRUE(waitForEnvoyToExit()); +} + TEST_F(AdminStreamingTest, QuitCancelRace) { AdminResponseSharedPtr response = streamingResponse(); quitAndRequestHeaders(); @@ -324,11 +330,4 @@ TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { response.reset(); } -TEST_F(AdminStreamingTest, QuitTerminateRace) { - AdminResponseSharedPtr response = streamingResponse(); - adminRequest("/quitquitquit", "POST"); - response.reset(); - EXPECT_TRUE(waitForEnvoyToExit()); -} - } // namespace Envoy From 03ce2c7b9eef2533908e92453d6ec3cc70bf7b47 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 28 Feb 2024 22:09:40 -0500 Subject: [PATCH 31/39] review comments Signed-off-by: Joshua Marantz --- source/exe/admin_response.cc | 7 +-- source/exe/admin_response.h | 11 ++-- test/exe/admin_response_test.cc | 97 ++++++++++++++++++++++++--------- 3 files changed, 80 insertions(+), 35 deletions(-) diff --git a/source/exe/admin_response.cc b/source/exe/admin_response.cc index c5745217e7e4..0c1ab0958bec 100644 --- a/source/exe/admin_response.cc +++ b/source/exe/admin_response.cc @@ -123,13 +123,12 @@ void AdminResponse::requestNextChunk() { ASSERT_IS_MAIN_OR_TEST_THREAD(); { absl::MutexLock lock(&mutex_); - if (cancelled_ || terminated_) { + if (cancelled_ || terminated_ || !more_data_) { return; } } - while (response_.length() == 0 && more_data_) { - more_data_ = request_->nextChunk(response_); - } + ASSERT(response_.length() == 0); + more_data_ = request_->nextChunk(response_); { absl::MutexLock lock(&mutex_); if (sent_end_stream_ || cancelled_) { diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index fa92966d439c..a5d4ca20dcd2 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -35,12 +35,11 @@ class AdminResponse : public std::enable_shared_from_this { public: // AdminResponse can outlive MainCommonBase. But AdminResponse needs a // reliable way of knowing whether MainCommonBase is alive, so we do this with - // PtrSet, which is held by MainCommonBase and all the active AdminResponse, - // which is held by MainCommonBase and by AdminResponse via shared_ptr. This - // gives MainCommonBase a reliable way of notifying all active responses that - // it is being shut down, and thus all responses need to be terminated. And it - // gives a reliable way for AdminResponse to detach itself, even if - // MainCommonBase is already deleted. + // PtrSet, which is held by MainCommonBase and all the active AdminResponses. + // via shared_ptr. This gives MainCommonBase a reliable way of notifying all + // active responses that it is being shut down, and thus all responses need to + // be terminated. And it gives a reliable way for AdminResponse to detach + // itself, whether or not MainCommonBase is already deleted. class PtrSet { public: /** diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index b307831da54e..9767ed73849a 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -46,26 +46,35 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { true, false); } - using ChunksBytes = std::pair; - ChunksBytes runStreamingRequest(AdminResponseSharedPtr response, - std::function chunk_hook = nullptr) { + struct ResponseData { + uint64_t num_chunks_{0}; + uint64_t num_bytes_{0}; + Http::Code code_; + std::string content_type_; + }; + + ResponseData runStreamingRequest(AdminResponseSharedPtr response, + std::function chunk_hook = nullptr) { absl::Notification done; std::vector out; absl::Notification headers_notify; + ResponseData response_data; response->getHeaders( - [&headers_notify](Http::Code, Http::ResponseHeaderMap&) { headers_notify.Notify(); }); + [&headers_notify, &response_data](Http::Code code, Http::ResponseHeaderMap& headers) { + response_data.code_ = code; + response_data.content_type_ = headers.getContentTypeValue(); + headers_notify.Notify(); + }); headers_notify.WaitForNotification(); bool cont = true; - uint64_t num_chunks = 0; - uint64_t num_bytes = 0; while (cont && !response->cancelled()) { absl::Notification chunk_notify; response->nextChunk( - [&chunk_notify, &num_chunks, &num_bytes, &cont](Buffer::Instance& chunk, bool more) { + [&chunk_notify, &response_data, &cont](Buffer::Instance& chunk, bool more) { cont = more; - num_bytes += chunk.length(); + response_data.num_bytes_ += chunk.length(); chunk.drain(chunk.length()); - ++num_chunks; + ++response_data.num_chunks_; chunk_notify.Notify(); }); chunk_notify.WaitForNotification(); @@ -74,7 +83,7 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { } } - return ChunksBytes(num_chunks, num_bytes); + return response_data; } /** @@ -99,6 +108,8 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } +#define CANCEL_AFTER_RESUME_HELPER 0 +#if CANCEL_AFTER_RESUME_HELPER /** * To hit an early exit after the second lock in * AdminResponseImpl::requestNextChunk and AdminResponseImpl::requestHeaders @@ -115,6 +126,7 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { response->cancel(); }); } +#endif /** * Requests the headers and waits until the headers have been sent. @@ -148,10 +160,12 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { TEST_F(AdminStreamingTest, RequestGetStatsAndQuit) { AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(StreamingAdminRequest::NumChunks, chunks_bytes.first); + ResponseData response_data = runStreamingRequest(response); + EXPECT_EQ(StreamingAdminRequest::NumChunks, response_data.num_chunks_); EXPECT_EQ(StreamingAdminRequest::NumChunks * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); + response_data.num_bytes_); + EXPECT_EQ(Http::Code::OK, response_data.code_); + EXPECT_EQ("text/plain; charset=UTF-8", response_data.content_type_); EXPECT_TRUE(quitAndWait()); } @@ -159,28 +173,32 @@ TEST_F(AdminStreamingTest, QuitDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response, [&quit_counter, this]() { + ResponseData response_data = runStreamingRequest(response, [&quit_counter, this]() { if (++quit_counter == chunks_to_send_before_quitting) { EXPECT_TRUE(quitAndWait()); } }); - EXPECT_EQ(4, chunks_bytes.first); + EXPECT_EQ(4, response_data.num_chunks_); EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); + response_data.num_bytes_); + EXPECT_EQ(Http::Code::OK, response_data.code_); + EXPECT_EQ("text/plain; charset=UTF-8", response_data.content_type_); } TEST_F(AdminStreamingTest, CancelDuringChunks) { int quit_counter = 0; static constexpr int chunks_to_send_before_quitting = 3; AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response, [response, &quit_counter]() { + ResponseData response_data = runStreamingRequest(response, [response, &quit_counter]() { if (++quit_counter == chunks_to_send_before_quitting) { response->cancel(); } }); - EXPECT_EQ(3, chunks_bytes.first); // no final call to the chunk handler after cancel. + EXPECT_EQ(3, response_data.num_chunks_); // no final call to the chunk handler after cancel. EXPECT_EQ(chunks_to_send_before_quitting * StreamingAdminRequest::BytesPerChunk, - chunks_bytes.second); + response_data.num_bytes_); + EXPECT_EQ(Http::Code::OK, response_data.code_); + EXPECT_EQ("text/plain; charset=UTF-8", response_data.content_type_); EXPECT_TRUE(quitAndWait()); } @@ -199,10 +217,17 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader2) { AdminResponseSharedPtr response = streamingResponse(); +#if CANCEL_AFTER_RESUME_HELPER blockMainThreadAndCancelResponseAfterResume(response); +#else + blockMainThreadUntilResume("/ready", "GET"); +#endif int header_calls = 0; response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); resume_.Notify(); +#if !CANCEL_AFTER_RESUME_HELPER + response->cancel(); +#endif EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, header_calls); } @@ -260,10 +285,17 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); +#if CANCEL_AFTER_RESUME_HELPER blockMainThreadAndCancelResponseAfterResume(response); +#else + blockMainThreadUntilResume("/ready", "GET"); +#endif int chunk_calls = 0; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); resume_.Notify(); +#if !CANCEL_AFTER_RESUME_HELPER + response->cancel(); +#endif EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, chunk_calls); } @@ -286,9 +318,11 @@ TEST_F(AdminStreamingTest, CancelAfterAskingForChunk) { TEST_F(AdminStreamingTest, QuitBeforeHeaders) { AdminResponseSharedPtr response = streamingResponse(); EXPECT_TRUE(quitAndWait()); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(1, chunks_bytes.first); - EXPECT_EQ(0, chunks_bytes.second); + ResponseData response_data = runStreamingRequest(response); + EXPECT_EQ(1, response_data.num_chunks_); + EXPECT_EQ(0, response_data.num_bytes_); + EXPECT_EQ(Http::Code::InternalServerError, response_data.code_); + EXPECT_EQ("text/plain; charset=UTF-8", response_data.content_type_); } TEST_F(AdminStreamingTest, QuitDeleteRace1) { @@ -322,12 +356,25 @@ TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { adminRequest("/quitquitquit", "POST"); pause_point_.WaitForNotification(); // run() finished, but main_common_ still exists. AdminResponseSharedPtr response = streamingResponse(); - ChunksBytes chunks_bytes = runStreamingRequest(response); - EXPECT_EQ(1, chunks_bytes.first); - EXPECT_EQ(0, chunks_bytes.second); + ResponseData response_data = runStreamingRequest(response); + EXPECT_EQ(1, response_data.num_chunks_); + EXPECT_EQ(0, response_data.num_bytes_); + EXPECT_EQ(Http::Code::InternalServerError, response_data.code_); + EXPECT_EQ("text/plain; charset=UTF-8", response_data.content_type_); resume_.Notify(); EXPECT_TRUE(waitForEnvoyToExit()); response.reset(); } +TEST_F(AdminStreamingTest, TimeoutGettingResponse) { + blockMainThreadUntilResume("/ready", "GET"); + AdminResponseSharedPtr response = streamingResponse(); + absl::Notification got_headers; + response->getHeaders( + [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); + ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); + resume_.Notify(); + EXPECT_TRUE(quitAndWait()); +} + } // namespace Envoy From 48ece2d8005f879821d2ab93a05c92b0ef24ac46 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 29 Feb 2024 11:32:53 -0500 Subject: [PATCH 32/39] remove ifdef'd out test helper. Signed-off-by: Joshua Marantz --- test/exe/admin_response_test.cc | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 9767ed73849a..34698a4436d4 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -108,26 +108,6 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); } -#define CANCEL_AFTER_RESUME_HELPER 0 -#if CANCEL_AFTER_RESUME_HELPER - /** - * To hit an early exit after the second lock in - * AdminResponseImpl::requestNextChunk and AdminResponseImpl::requestHeaders - * we, need to post a cancel request to the main thread, but block that on the - * resume_ notification. This allows a subsequent test call to getHeaders or - * nextChunk initiate a post to main thread, but runs the cancel call on the - * response before headers_fn_ or body_fn_ runs. - * - * @param response the response to cancel after resume_. - */ - void blockMainThreadAndCancelResponseAfterResume(AdminResponseSharedPtr response) { - main_common_->dispatcherForTest().post([response, this] { - resume_.WaitForNotification(); - response->cancel(); - }); - } -#endif - /** * Requests the headers and waits until the headers have been sent. * @@ -217,17 +197,11 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader2) { AdminResponseSharedPtr response = streamingResponse(); -#if CANCEL_AFTER_RESUME_HELPER - blockMainThreadAndCancelResponseAfterResume(response); -#else blockMainThreadUntilResume("/ready", "GET"); -#endif int header_calls = 0; response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); resume_.Notify(); -#if !CANCEL_AFTER_RESUME_HELPER response->cancel(); -#endif EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, header_calls); } @@ -285,17 +259,11 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); -#if CANCEL_AFTER_RESUME_HELPER - blockMainThreadAndCancelResponseAfterResume(response); -#else blockMainThreadUntilResume("/ready", "GET"); -#endif int chunk_calls = 0; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); resume_.Notify(); -#if !CANCEL_AFTER_RESUME_HELPER response->cancel(); -#endif EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, chunk_calls); } From b006d79bd51eebb0e87c6a697c573a8a70b5484c Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 29 Feb 2024 11:36:23 -0500 Subject: [PATCH 33/39] add reference to FSM drawing Signed-off-by: Joshua Marantz --- source/exe/admin_response.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index a5d4ca20dcd2..1b57880ff091 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -31,6 +31,10 @@ class AdminResponse; // // Requests can also be cancelled explicitly by calling cancel(). After // cancel() is called, no further callbacks will be called by the response. +// +// The lifecycle of an AdminResponse is rendered as an finite state machine +// bubble diagram: +// https://docs.google.com/drawings/d/1njUl1twApEMoxmjaG4b7optTh5fcb_YNcfSnkHbdfq0/view class AdminResponse : public std::enable_shared_from_this { public: // AdminResponse can outlive MainCommonBase. But AdminResponse needs a From f799b84f25e42561d8c45befd510478f4811156b Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 29 Feb 2024 11:36:49 -0500 Subject: [PATCH 34/39] grammar Signed-off-by: Joshua Marantz --- source/exe/admin_response.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index 1b57880ff091..89636f92a865 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -32,7 +32,7 @@ class AdminResponse; // Requests can also be cancelled explicitly by calling cancel(). After // cancel() is called, no further callbacks will be called by the response. // -// The lifecycle of an AdminResponse is rendered as an finite state machine +// The lifecycle of an AdminResponse is rendered as a finite state machine // bubble diagram: // https://docs.google.com/drawings/d/1njUl1twApEMoxmjaG4b7optTh5fcb_YNcfSnkHbdfq0/view class AdminResponse : public std::enable_shared_from_this { From e1640d33e2640d6605de4783af3bc38f9ec947dc Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 29 Feb 2024 17:28:24 -0500 Subject: [PATCH 35/39] fix race. Signed-off-by: Joshua Marantz --- test/exe/admin_response_test.cc | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 34698a4436d4..569bad0eef9b 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -182,7 +182,7 @@ TEST_F(AdminStreamingTest, CancelDuringChunks) { EXPECT_TRUE(quitAndWait()); } -TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader1) { +TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader) { blockMainThreadUntilResume("/ready", "GET"); AdminResponseSharedPtr response = streamingResponse(); response->cancel(); @@ -195,17 +195,6 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader1) { EXPECT_EQ(0, header_calls); } -TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader2) { - AdminResponseSharedPtr response = streamingResponse(); - blockMainThreadUntilResume("/ready", "GET"); - int header_calls = 0; - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - resume_.Notify(); - response->cancel(); - EXPECT_TRUE(quitAndWait()); - EXPECT_EQ(0, header_calls); -} - TEST_F(AdminStreamingTest, CancelAfterAskingForHeader1) { int header_calls = 0; blockMainThreadUntilResume("/ready", "GET"); @@ -262,8 +251,8 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { blockMainThreadUntilResume("/ready", "GET"); int chunk_calls = 0; response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - resume_.Notify(); response->cancel(); + resume_.Notify(); EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, chunk_calls); } @@ -340,6 +329,7 @@ TEST_F(AdminStreamingTest, TimeoutGettingResponse) { absl::Notification got_headers; response->getHeaders( [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); + ENVOY_LOG_MISC(info, "Blocking for 5 seconds to test timeout functionality..."); ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); resume_.Notify(); EXPECT_TRUE(quitAndWait()); From 0e6a275240326fa7b833e5ce3f9e1c2471c68b90 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 6 Mar 2024 13:44:31 -0500 Subject: [PATCH 36/39] Better interlock for test infrastructure, hopefully still hitting all the lines. Signed-off-by: Joshua Marantz --- test/exe/admin_response_test.cc | 74 +++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 569bad0eef9b..050c026c3915 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -102,10 +102,24 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { * * @param url the admin endpoint to initiate. */ - void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { + /*void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { AdminResponseSharedPtr blocked_response = main_common_->adminRequest(url, method); blocked_response->getHeaders( [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); + }*/ + + void interlockMainThread(std::function fn, + std::function run_before_resume = nullptr) { + main_common_->dispatcherForTest().post([this, fn] { + resume_.WaitForNotification(); + fn(); + pause_point_.Notify(); + }); + if (run_before_resume != nullptr) { + run_before_resume(); + } + resume_.Notify(); + pause_point_.WaitForNotification(); } /** @@ -183,10 +197,8 @@ TEST_F(AdminStreamingTest, CancelDuringChunks) { } TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader) { - blockMainThreadUntilResume("/ready", "GET"); AdminResponseSharedPtr response = streamingResponse(); - response->cancel(); - resume_.Notify(); + interlockMainThread([response]() { response->cancel(); }); int header_calls = 0; // After 'cancel', the headers function will not be called. @@ -197,11 +209,12 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader) { TEST_F(AdminStreamingTest, CancelAfterAskingForHeader1) { int header_calls = 0; - blockMainThreadUntilResume("/ready", "GET"); + // blockMainThreadUntilResume("/ready", "GET"); AdminResponseSharedPtr response = streamingResponse(); - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - response->cancel(); - resume_.Notify(); + interlockMainThread([&header_calls, response]() { + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response->cancel(); + }); EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, header_calls); } @@ -217,11 +230,12 @@ TEST_F(AdminStreamingTest, CancelAfterAskingForHeader2) { TEST_F(AdminStreamingTest, DeleteAfterAskingForHeader1) { int header_calls = 0; - blockMainThreadUntilResume("/ready", "GET"); AdminResponseSharedPtr response = streamingResponse(); - response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); - response.reset(); - resume_.Notify(); + // blockMainThreadUntilResume("/ready", "GET"); + interlockMainThread([&response, &header_calls]() { + response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); + response.reset(); + }); EXPECT_TRUE(quitAndWait()); EXPECT_EQ(1, header_calls); } @@ -248,11 +262,12 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); - blockMainThreadUntilResume("/ready", "GET"); + // blockMainThreadUntilResume("/ready", "GET"); int chunk_calls = 0; - response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - response->cancel(); - resume_.Notify(); + interlockMainThread([&response, &chunk_calls]() { + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + response->cancel(); + }); EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, chunk_calls); } @@ -260,14 +275,17 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { TEST_F(AdminStreamingTest, CancelAfterAskingForChunk) { AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); - blockMainThreadUntilResume("/ready", "GET"); int chunk_calls = 0; // Cause the /streaming handler to pause while yielding the next chunk, to hit // an early exit in requestNextChunk. next_chunk_hook_ = [response]() { response->cancel(); }; - response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); - resume_.Notify(); + + // blockMainThreadUntilResume("/ready", "GET"); + interlockMainThread([&chunk_calls, response]() { + response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); + }); + EXPECT_TRUE(quitAndWait()); EXPECT_EQ(0, chunk_calls); } @@ -324,14 +342,18 @@ TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { } TEST_F(AdminStreamingTest, TimeoutGettingResponse) { - blockMainThreadUntilResume("/ready", "GET"); - AdminResponseSharedPtr response = streamingResponse(); + // blockMainThreadUntilResume("/ready", "GET"); absl::Notification got_headers; - response->getHeaders( - [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); - ENVOY_LOG_MISC(info, "Blocking for 5 seconds to test timeout functionality..."); - ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); - resume_.Notify(); + AdminResponseSharedPtr response = streamingResponse(); + interlockMainThread( + [response, &got_headers]() { + response->getHeaders( + [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); + ENVOY_LOG_MISC(info, "Blocking for 5 seconds to test timeout functionality..."); + }, + [&got_headers]() { + ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); + }); EXPECT_TRUE(quitAndWait()); } From 23caf5e52ef26e63152e45f42b9e7e65ef919094 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 6 Mar 2024 19:48:57 -0500 Subject: [PATCH 37/39] remove run_before_resume hack. Signed-off-by: Joshua Marantz --- test/exe/admin_response_test.cc | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 050c026c3915..92e731b86473 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -108,16 +108,12 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); }*/ - void interlockMainThread(std::function fn, - std::function run_before_resume = nullptr) { + void interlockMainThread(std::function fn) { main_common_->dispatcherForTest().post([this, fn] { resume_.WaitForNotification(); fn(); pause_point_.Notify(); }); - if (run_before_resume != nullptr) { - run_before_resume(); - } resume_.Notify(); pause_point_.WaitForNotification(); } @@ -342,6 +338,7 @@ TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { } TEST_F(AdminStreamingTest, TimeoutGettingResponse) { +#if 0 // blockMainThreadUntilResume("/ready", "GET"); absl::Notification got_headers; AdminResponseSharedPtr response = streamingResponse(); @@ -355,6 +352,23 @@ TEST_F(AdminStreamingTest, TimeoutGettingResponse) { ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); }); EXPECT_TRUE(quitAndWait()); +#else + // blockMainThreadUntilResume("/ready", "GET"); + absl::Notification got_headers; + AdminResponseSharedPtr response = streamingResponse(); + main_common_->dispatcherForTest().post([this, response, &got_headers] { + resume_.WaitForNotification(); + response->getHeaders( + [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); + pause_point_.Notify(); + }); + + ENVOY_LOG_MISC(info, "Blocking for 5 seconds to test timeout functionality..."); + ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); + resume_.Notify(); + pause_point_.WaitForNotification(); + EXPECT_TRUE(quitAndWait()); +#endif } } // namespace Envoy From 24271701875b390c867fdab67ad8c850441e3fa8 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 6 Mar 2024 21:02:49 -0500 Subject: [PATCH 38/39] clean up Signed-off-by: Joshua Marantz --- test/exe/admin_response_test.cc | 35 ++++++--------------------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/test/exe/admin_response_test.cc b/test/exe/admin_response_test.cc index 92e731b86473..33a7f10248e5 100644 --- a/test/exe/admin_response_test.cc +++ b/test/exe/admin_response_test.cc @@ -98,16 +98,11 @@ class AdminStreamingTest : public AdminRequestTestBase, public testing::Test { * the fact that all the admin responses are delivered on the main thread. * So we can pause those by blocking the main thread indefinitely. * - * To resume the main thread, call resume_.Notify(); + * The provided lambda runs in the main thread, between two notifications + * controlled by this function. * - * @param url the admin endpoint to initiate. + * @param fn function to run in the main thread, before interlockMainThread returns. */ - /*void blockMainThreadUntilResume(absl::string_view url, absl::string_view method) { - AdminResponseSharedPtr blocked_response = main_common_->adminRequest(url, method); - blocked_response->getHeaders( - [this](Http::Code, Http::ResponseHeaderMap&) { resume_.WaitForNotification(); }); - }*/ - void interlockMainThread(std::function fn) { main_common_->dispatcherForTest().post([this, fn] { resume_.WaitForNotification(); @@ -205,7 +200,6 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForHeader) { TEST_F(AdminStreamingTest, CancelAfterAskingForHeader1) { int header_calls = 0; - // blockMainThreadUntilResume("/ready", "GET"); AdminResponseSharedPtr response = streamingResponse(); interlockMainThread([&header_calls, response]() { response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); @@ -227,7 +221,6 @@ TEST_F(AdminStreamingTest, CancelAfterAskingForHeader2) { TEST_F(AdminStreamingTest, DeleteAfterAskingForHeader1) { int header_calls = 0; AdminResponseSharedPtr response = streamingResponse(); - // blockMainThreadUntilResume("/ready", "GET"); interlockMainThread([&response, &header_calls]() { response->getHeaders([&header_calls](Http::Code, Http::ResponseHeaderMap&) { ++header_calls; }); response.reset(); @@ -258,7 +251,6 @@ TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk1) { TEST_F(AdminStreamingTest, CancelBeforeAskingForChunk2) { AdminResponseSharedPtr response = streamingResponse(); waitForHeaders(response); - // blockMainThreadUntilResume("/ready", "GET"); int chunk_calls = 0; interlockMainThread([&response, &chunk_calls]() { response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); @@ -277,7 +269,6 @@ TEST_F(AdminStreamingTest, CancelAfterAskingForChunk) { // an early exit in requestNextChunk. next_chunk_hook_ = [response]() { response->cancel(); }; - // blockMainThreadUntilResume("/ready", "GET"); interlockMainThread([&chunk_calls, response]() { response->nextChunk([&chunk_calls](Buffer::Instance&, bool) { ++chunk_calls; }); }); @@ -338,24 +329,11 @@ TEST_F(AdminStreamingTest, QuitBeforeCreatingResponse) { } TEST_F(AdminStreamingTest, TimeoutGettingResponse) { -#if 0 - // blockMainThreadUntilResume("/ready", "GET"); - absl::Notification got_headers; - AdminResponseSharedPtr response = streamingResponse(); - interlockMainThread( - [response, &got_headers]() { - response->getHeaders( - [&got_headers](Http::Code, Http::ResponseHeaderMap&) { got_headers.Notify(); }); - ENVOY_LOG_MISC(info, "Blocking for 5 seconds to test timeout functionality..."); - }, - [&got_headers]() { - ASSERT_FALSE(got_headers.WaitForNotificationWithTimeout(absl::Seconds(5))); - }); - EXPECT_TRUE(quitAndWait()); -#else - // blockMainThreadUntilResume("/ready", "GET"); absl::Notification got_headers; AdminResponseSharedPtr response = streamingResponse(); + + // Mimics a slow admin response by adding a blocking notification in front + // of a call to initiate an admin request. main_common_->dispatcherForTest().post([this, response, &got_headers] { resume_.WaitForNotification(); response->getHeaders( @@ -368,7 +346,6 @@ TEST_F(AdminStreamingTest, TimeoutGettingResponse) { resume_.Notify(); pause_point_.WaitForNotification(); EXPECT_TRUE(quitAndWait()); -#endif } } // namespace Envoy From bd8db801154d3fdbb9532d8bf6ed36f979d12ad0 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 7 Mar 2024 09:09:02 -0500 Subject: [PATCH 39/39] Remove half-baked README.md, and add Tianyu's note to PtrSet comment. Signed-off-by: Joshua Marantz --- source/exe/README.md | 10 ---------- source/exe/admin_response.h | 4 ++++ 2 files changed, 4 insertions(+), 10 deletions(-) delete mode 100644 source/exe/README.md diff --git a/source/exe/README.md b/source/exe/README.md deleted file mode 100644 index b8a0b6c70272..000000000000 --- a/source/exe/README.md +++ /dev/null @@ -1,10 +0,0 @@ -State Action Next State Side Effects - -Ground adminRequest(u,m) ResponseInitial -ResponseInitial getHeaders HeadersWait post to main thread to get headers -HeadersWait postComplete HeadersSent call HeadersFn -HeadersSent - - -ResponseInitial cancel ResponseCancelled -Terminated adminRequest(u,m) ResponseTerminated diff --git a/source/exe/admin_response.h b/source/exe/admin_response.h index 89636f92a865..b3683b5707c7 100644 --- a/source/exe/admin_response.h +++ b/source/exe/admin_response.h @@ -44,6 +44,10 @@ class AdminResponse : public std::enable_shared_from_this { // active responses that it is being shut down, and thus all responses need to // be terminated. And it gives a reliable way for AdminResponse to detach // itself, whether or not MainCommonBase is already deleted. + // + // In summary: + // * MainCommonBase can outlive AdminResponse so we need detachResponse. + // * AdminResponse can outlive MainCommonBase, so we need shared_ptr. class PtrSet { public: /**