Skip to content

Commit

Permalink
HTTP health checker: handle GOAWAY from HTTP2 upstreams (#13599)
Browse files Browse the repository at this point in the history
Makes the HTTP health checker handle GOAWAY properly. When the NO_ERROR
code is received, any in flight request will be allowed to complete, at
which time the connection will be closed and a new connection created on
the next interval.

GOAWAY frames with codes other than NO_ERROR are treated as a health
check failure, and immediately close the connection.

Signed-off-by: Michael Puncel <[email protected]>
  • Loading branch information
mpuncel authored Jan 12, 2021
1 parent 7b5544b commit a836c8e
Show file tree
Hide file tree
Showing 13 changed files with 570 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Bug Fixes
---------
*Changes expected to improve the state of the world and are unlikely to have negative effects*

* active http health checks: properly handles HTTP/2 GOAWAY frames from the upstream. Previously a GOAWAY frame due to a graceful listener drain could cause improper failed health checks due to streams being refused by the upstream on a connection that is going away. To revert to old GOAWAY handling behavior, set the runtime feature `envoy.reloadable_features.health_check.graceful_goaway_handling` to false.

Removed Config or Runtime
-------------------------
*Normally occurs at the end of the* :ref:`deprecation period <deprecated>`
Expand Down
9 changes: 9 additions & 0 deletions source/common/http/http2/codec_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,15 @@ void ConnectionImpl::shutdownNotice() {
}
}

Status ConnectionImpl::protocolErrorForTest() {
int rc = nghttp2_submit_goaway(session_, NGHTTP2_FLAG_NONE,
nghttp2_session_get_last_proc_stream_id(session_),
NGHTTP2_PROTOCOL_ERROR, nullptr, 0);
ASSERT(rc == 0);

return sendPendingFrames();
}

Status ConnectionImpl::onBeforeFrameReceived(const nghttp2_frame_hd* hd) {
ENVOY_CONN_LOG(trace, "about to recv frame type={}, flags={}", connection_,
static_cast<uint64_t>(hd->type), static_cast<uint64_t>(hd->flags));
Expand Down
1 change: 1 addition & 0 deletions source/common/http/http2/codec_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class ConnectionImpl : public virtual Connection, protected Logger::Loggable<Log
void goAway() override;
Protocol protocol() override { return Protocol::Http2; }
void shutdownNotice() override;
Status protocolErrorForTest(); // Used in tests to simulate errors.
bool wantsToWrite() override { return nghttp2_session_want_write(session_); }
// Propagate network connection watermark events to each stream on the connection.
void onUnderlyingConnectionAboveWriteBufferHighWatermark() override {
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ constexpr const char* runtime_features[] = {
"envoy.reloadable_features.fix_wildcard_matching",
"envoy.reloadable_features.fixed_connection_close",
"envoy.reloadable_features.hcm_stream_error_on_invalid_message",
"envoy.reloadable_features.health_check.graceful_goaway_handling",
"envoy.reloadable_features.http_default_alpn",
"envoy.reloadable_features.http_match_on_all_headers",
"envoy.reloadable_features.http_set_copy_replace_all_headers",
Expand Down
48 changes: 45 additions & 3 deletions source/common/upstream/health_checker_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,14 @@ void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onInterval() {
parent_.transportSocketMatchMetadata().get());
client_.reset(parent_.createCodecClient(conn));
client_->addConnectionCallbacks(connection_callback_impl_);
client_->setCodecConnectionCallbacks(http_connection_callback_impl_);
expect_reset_ = false;
reuse_connection_ = parent_.reuse_connection_;
}

Http::RequestEncoder* request_encoder = &client_->newStream(*this);
request_encoder->getStream().addCallbacks(*this);
request_in_flight_ = true;

const auto request_headers = Http::createHeaderMap<Http::RequestHeaderMapImpl>(
{{Http::Headers::get().Method, "GET"},
Expand All @@ -284,15 +287,51 @@ void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onInterval() {

void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResetStream(Http::StreamResetReason,
absl::string_view) {
request_in_flight_ = false;
ENVOY_CONN_LOG(debug, "connection/stream error health_flags={}", *client_,
HostUtility::healthFlagsToString(*host_));
if (expect_reset_) {
return;
}

ENVOY_CONN_LOG(debug, "connection/stream error health_flags={}", *client_,
HostUtility::healthFlagsToString(*host_));
if (client_ && !reuse_connection_) {
client_->close();
}

handleFailure(envoy::data::core::v3::NETWORK);
}

void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onGoAway(
Http::GoAwayErrorCode error_code) {
ENVOY_CONN_LOG(debug, "connection going away goaway_code={}, health_flags={}", *client_,
error_code, HostUtility::healthFlagsToString(*host_));

// Runtime guard around graceful handling of NO_ERROR GOAWAY handling. The old behavior is to
// ignore GOAWAY completely.
if (!parent_.runtime_.snapshot().runtimeFeatureEnabled(
"envoy.reloadable_features.health_check.graceful_goaway_handling")) {
return;
}

if (request_in_flight_ && error_code == Http::GoAwayErrorCode::NoError) {
// The server is starting a graceful shutdown. Allow the in flight request
// to finish without treating this as a health check error, and then
// reconnect.
reuse_connection_ = false;
return;
}

if (request_in_flight_) {
// Record this as a failed health check.
handleFailure(envoy::data::core::v3::NETWORK);
}

if (client_) {
expect_reset_ = true;
client_->close();
}
}

HttpHealthCheckerImpl::HttpActiveHealthCheckSession::HealthCheckResult
HttpHealthCheckerImpl::HttpActiveHealthCheckSession::healthCheckResult() {
uint64_t response_code = Http::Utility::getResponseStatus(*response_headers_);
Expand Down Expand Up @@ -323,6 +362,8 @@ HttpHealthCheckerImpl::HttpActiveHealthCheckSession::healthCheckResult() {
}

void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onResponseComplete() {
request_in_flight_ = false;

switch (healthCheckResult()) {
case HealthCheckResult::Succeeded:
handleSuccess(false);
Expand Down Expand Up @@ -350,7 +391,7 @@ bool HttpHealthCheckerImpl::HttpActiveHealthCheckSession::shouldClose() const {
return false;
}

if (!parent_.reuse_connection_) {
if (!reuse_connection_) {
return true;
}

Expand Down Expand Up @@ -380,6 +421,7 @@ bool HttpHealthCheckerImpl::HttpActiveHealthCheckSession::shouldClose() const {
}

void HttpHealthCheckerImpl::HttpActiveHealthCheckSession::onTimeout() {
request_in_flight_ = false;
if (client_) {
host_->setActiveHealthFailureType(Host::ActiveHealthFailureType::TIMEOUT);
ENVOY_CONN_LOG(debug, "connection/stream timeout health_flags={}", *client_,
Expand Down
14 changes: 14 additions & 0 deletions source/common/upstream/health_checker_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class HttpHealthCheckerImpl : public HealthCheckerImplBase {
void onBelowWriteBufferLowWatermark() override {}

void onEvent(Network::ConnectionEvent event);
void onGoAway(Http::GoAwayErrorCode error_code);

class ConnectionCallbackImpl : public Network::ConnectionCallbacks {
public:
Expand All @@ -119,14 +120,27 @@ class HttpHealthCheckerImpl : public HealthCheckerImplBase {
HttpActiveHealthCheckSession& parent_;
};

class HttpConnectionCallbackImpl : public Http::ConnectionCallbacks {
public:
HttpConnectionCallbackImpl(HttpActiveHealthCheckSession& parent) : parent_(parent) {}
// Http::ConnectionCallbacks
void onGoAway(Http::GoAwayErrorCode error_code) override { parent_.onGoAway(error_code); }

private:
HttpActiveHealthCheckSession& parent_;
};

ConnectionCallbackImpl connection_callback_impl_{*this};
HttpConnectionCallbackImpl http_connection_callback_impl_{*this};
HttpHealthCheckerImpl& parent_;
Http::CodecClientPtr client_;
Http::ResponseHeaderMapPtr response_headers_;
const std::string& hostname_;
const Http::Protocol protocol_;
Network::SocketAddressProviderSharedPtr local_address_provider_;
bool expect_reset_{};
bool reuse_connection_ = false;
bool request_in_flight_ = false;
};

using HttpActiveHealthCheckSessionPtr = std::unique_ptr<HttpActiveHealthCheckSession>;
Expand Down
18 changes: 18 additions & 0 deletions test/common/http/http2/codec_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,24 @@ TEST_P(Http2CodecImplTest, ShutdownNotice) {
response_encoder_->encodeHeaders(response_headers, true);
}

TEST_P(Http2CodecImplTest, ProtocolErrorForTest) {
initialize();
EXPECT_EQ(absl::nullopt, request_encoder_->http1StreamEncoderOptions());

TestRequestHeaderMapImpl request_headers;
HttpTestUtility::addDefaultHeaders(request_headers);
EXPECT_CALL(request_decoder_, decodeHeaders_(_, true));
EXPECT_TRUE(request_encoder_->encodeHeaders(request_headers, true).ok());

EXPECT_CALL(client_callbacks_, onGoAway(Http::GoAwayErrorCode::Other));

// We have to dynamic cast because protocolErrorForTest() is intentionally not on the
// Connection API.
ServerConnectionImpl* raw_server = dynamic_cast<ServerConnectionImpl*>(server_.get());
ASSERT(raw_server != nullptr);
EXPECT_EQ(StatusCode::CodecProtocolError, getStatusCode(raw_server->protocolErrorForTest()));
}

// 100 response followed by 200 results in a [decode100ContinueHeaders, decodeHeaders] sequence.
TEST_P(Http2CodecImplTest, ContinueHeaders) {
initialize();
Expand Down
3 changes: 2 additions & 1 deletion test/common/upstream/health_check_fuzz_test_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ void HttpHealthCheckerImplTestBase::expectClientCreate(
std::shared_ptr<Upstream::MockClusterInfo> cluster{
new NiceMock<Upstream::MockClusterInfo>()};
Event::MockDispatcher dispatcher_;
return new CodecClientForTest(
test_session.codec_client_ = new CodecClientForTest(
Http::CodecClient::Type::HTTP1, std::move(conn_data.connection_),
test_session.codec_, nullptr,
Upstream::makeTestHost(cluster, "tcp://127.0.0.1:9000", dispatcher_.timeSource()),
dispatcher_);
return test_session.codec_client_;
}));
}

Expand Down
1 change: 1 addition & 0 deletions test/common/upstream/health_check_fuzz_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class HttpHealthCheckerImplTestBase : public HealthCheckerTestBase {
Network::MockClientConnection* client_connection_{};
NiceMock<Http::MockRequestEncoder> request_encoder_;
Http::ResponseDecoder* stream_response_callbacks_{};
CodecClientForTest* codec_client_{};
};

using TestSessionPtr = std::unique_ptr<TestSession>;
Expand Down
Loading

0 comments on commit a836c8e

Please sign in to comment.