From 9148b6afe4bdcfa40804dea464d463a28c8fe0b0 Mon Sep 17 00:00:00 2001 From: Greg Greenway Date: Wed, 18 Sep 2024 15:20:11 -0700 Subject: [PATCH] http2: fix reported protocol error from graceful upstream close Fixes #21522 Signed-off-by: Greg Greenway --- changelogs/current.yaml | 4 + source/common/http/codec_client.cc | 13 +- source/common/http/http2/codec_impl.cc | 6 +- source/common/http/status.cc | 12 ++ source/common/http/status.h | 7 ++ source/common/runtime/runtime_features.cc | 1 + .../multiplexed_integration_test.cc | 112 ++++++++++++++++++ 7 files changed, 150 insertions(+), 5 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index a8f9ca6d8856..18f918f31be4 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -116,6 +116,10 @@ bug_fixes: change: | RBAC will now allow stat prefixes configured in per-route config to override the base config's stat prefix. +- area: http2 + change: | + Fixed bug where an upstream that sent a GOAWAY and gracefully closed a connection would result in an increment of + the cluster stat ``upstream_cx_protocol_error`` and setting the ``UpstreamProtocolError`` response flag. - area: http3 change: | Fixed a bug where an empty trailers block could be sent. This would occur if a filter removed diff --git a/source/common/http/codec_client.cc b/source/common/http/codec_client.cc index 4c0cee082880..2f74974d983d 100644 --- a/source/common/http/codec_client.cc +++ b/source/common/http/codec_client.cc @@ -170,10 +170,15 @@ void CodecClient::onData(Buffer::Instance& data) { if (!status.ok()) { ENVOY_CONN_LOG(debug, "Error dispatching received data: {}", *connection_, status.message()); - // Don't count 408 responses where we have no active requests as protocol errors - if (!isPrematureResponseError(status) || - (!active_requests_.empty() || - getPrematureResponseHttpCode(status) != Code::RequestTimeout)) { + // Don't count 408 responses where we have no active requests as protocol errors. + // Don't count graceful GOAWAY closes. + const bool not_408 = + !isPrematureResponseError(status) || + (!active_requests_.empty() || getPrematureResponseHttpCode(status) != Code::RequestTimeout); + const bool is_goaway = isGoAwayGracefulCloseError(status); + if (not_408 && + (!is_goaway || !Runtime::runtimeFeatureEnabled( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close"))) { host_->cluster().trafficStats()->upstream_cx_protocol_error_.inc(); protocol_error_ = true; } diff --git a/source/common/http/http2/codec_impl.cc b/source/common/http/http2/codec_impl.cc index 9171d60a8865..92ce3565409d 100644 --- a/source/common/http/http2/codec_impl.cc +++ b/source/common/http/http2/codec_impl.cc @@ -1704,7 +1704,11 @@ int ConnectionImpl::setAndCheckCodecCallbackStatus(Status&& status) { // error statuses are silently discarded. codec_callback_status_.Update(std::move(status)); if (codec_callback_status_.ok() && connection_.state() != Network::Connection::State::Open) { - codec_callback_status_ = codecProtocolError("Connection was closed while dispatching frames"); + if (!active_streams_.empty() || !raised_goaway_) { + codec_callback_status_ = codecProtocolError("Connection was closed while dispatching frames"); + } else { + codec_callback_status_ = goAwayGracefulCloseError(); + } } return codec_callback_status_.ok() ? 0 : ERR_CALLBACK_FAILURE; diff --git a/source/common/http/status.cc b/source/common/http/status.cc index 86117912c212..2ae3d69bb32a 100644 --- a/source/common/http/status.cc +++ b/source/common/http/status.cc @@ -27,6 +27,8 @@ absl::string_view statusCodeToString(StatusCode code) { return "InboundFramesWithEmptyPayloadError"; case StatusCode::EnvoyOverloadError: return "EnvoyOverloadError"; + case StatusCode::GoAwayGracefulClose: + return "GoAwayGracefulClose"; } return ""; } @@ -124,6 +126,12 @@ Status envoyOverloadError(absl::string_view message) { return status; } +Status goAwayGracefulCloseError() { + absl::Status status(absl::StatusCode::kInternal, {}); + storePayload(status, EnvoyStatusPayload(StatusCode::GoAwayGracefulClose)); + return status; +} + // Methods for checking and extracting error information StatusCode getStatusCode(const Status& status) { return status.ok() ? StatusCode::Ok : getPayload(status).status_code_; @@ -160,5 +168,9 @@ bool isEnvoyOverloadError(const Status& status) { return getStatusCode(status) == StatusCode::EnvoyOverloadError; } +bool isGoAwayGracefulCloseError(const Status& status) { + return getStatusCode(status) == StatusCode::GoAwayGracefulClose; +} + } // namespace Http } // namespace Envoy diff --git a/source/common/http/status.h b/source/common/http/status.h index 858311b1d9bf..94c451a64075 100644 --- a/source/common/http/status.h +++ b/source/common/http/status.h @@ -79,6 +79,11 @@ enum class StatusCode : int { * Indicates that Envoy is overloaded and may shed load. */ EnvoyOverloadError = 6, + + /** + * Indicates the connection was gracefully closed due to GOAWAY. + */ + GoAwayGracefulClose = 7, }; using Status = absl::Status; @@ -100,6 +105,7 @@ Status prematureResponseError(absl::string_view message, Http::Code http_code); Status codecClientError(absl::string_view message); Status inboundFramesWithEmptyPayloadError(); Status envoyOverloadError(absl::string_view message); +Status goAwayGracefulCloseError(); /** * Returns Envoy::StatusCode of the given status object. @@ -116,6 +122,7 @@ ABSL_MUST_USE_RESULT bool isPrematureResponseError(const Status& status); ABSL_MUST_USE_RESULT bool isCodecClientError(const Status& status); ABSL_MUST_USE_RESULT bool isInboundFramesWithEmptyPayloadError(const Status& status); ABSL_MUST_USE_RESULT bool isEnvoyOverloadError(const Status& status); +ABSL_MUST_USE_RESULT bool isGoAwayGracefulCloseError(const Status& status); /** * Returns Http::Code value of the PrematureResponseError status. diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index ac6d76016105..acdec93efc4e 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -55,6 +55,7 @@ RUNTIME_GUARD(envoy_reloadable_features_http1_balsa_disallow_lone_cr_in_chunk_ex // Ignore the automated "remove this flag" issue: we should keep this for 1 year. RUNTIME_GUARD(envoy_reloadable_features_http1_use_balsa_parser); RUNTIME_GUARD(envoy_reloadable_features_http2_discard_host_header); +RUNTIME_GUARD(envoy_reloadable_features_http2_no_protocol_error_upon_clean_close); // Ignore the automated "remove this flag" issue: we should keep this for 1 year. RUNTIME_GUARD(envoy_reloadable_features_http2_use_oghttp2); RUNTIME_GUARD(envoy_reloadable_features_http2_use_visitor_for_data); diff --git a/test/integration/multiplexed_integration_test.cc b/test/integration/multiplexed_integration_test.cc index ff133982bded..a47d6e716176 100644 --- a/test/integration/multiplexed_integration_test.cc +++ b/test/integration/multiplexed_integration_test.cc @@ -2147,6 +2147,9 @@ TEST_P(Http2FrameIntegrationTest, AdjustUpstreamSettingsMaxStreams) { } TEST_P(Http2FrameIntegrationTest, UpstreamSettingsMaxStreamsAfterGoAway) { + config_helper_.addRuntimeOverride( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true"); + beginSession(); FakeRawConnectionPtr fake_upstream_connection; @@ -2170,6 +2173,115 @@ TEST_P(Http2FrameIntegrationTest, UpstreamSettingsMaxStreamsAfterGoAway) { std::string(settings_max_connections_frame)))); test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1); + EXPECT_EQ(0, test_server_->counter("cluster.cluster_0.upstream_cx_protocol_error")->value()); + + // Cleanup. + tcp_client_->close(); +} + +TEST_P(Http2FrameIntegrationTest, UpstreamGoAway) { + config_helper_.addRuntimeOverride( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true"); + + beginSession(); + FakeRawConnectionPtr fake_upstream_connection; + + const uint32_t client_stream_idx = 1; + // Start a request and wait for it to reach the upstream. + sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url")); + ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection)); + const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame(); + ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame))); + test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1); + + const Http2Frame rst_stream = + Http2Frame::makeResetStreamFrame(client_stream_idx, Http2Frame::ErrorCode::FlowControlError); + const Http2Frame go_away_frame = + Http2Frame::makeEmptyGoAwayFrame(12345, Http2Frame::ErrorCode::NoError); + ASSERT_TRUE(fake_upstream_connection->write( + absl::StrCat(std::string(rst_stream), std::string(go_away_frame)))); + ASSERT_TRUE(fake_upstream_connection->close()); + + test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1); + EXPECT_EQ(0, test_server_->counter("cluster.cluster_0.upstream_cx_protocol_error")->value()); + + // Cleanup. + tcp_client_->close(); +} + +TEST_P(Http2FrameIntegrationTest, UpstreamGoAwayLegacy) { + config_helper_.addRuntimeOverride( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "false"); + + beginSession(); + FakeRawConnectionPtr fake_upstream_connection; + + const uint32_t client_stream_idx = 1; + // Start a request and wait for it to reach the upstream. + sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url")); + ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection)); + const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame(); + ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame))); + test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1); + + const Http2Frame rst_stream = + Http2Frame::makeResetStreamFrame(client_stream_idx, Http2Frame::ErrorCode::FlowControlError); + const Http2Frame go_away_frame = + Http2Frame::makeEmptyGoAwayFrame(12345, Http2Frame::ErrorCode::NoError); + ASSERT_TRUE(fake_upstream_connection->write( + absl::StrCat(std::string(rst_stream), std::string(go_away_frame)))); + ASSERT_TRUE(fake_upstream_connection->close()); + + test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_close_notify", 1); + test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1); + + // Cleanup. + tcp_client_->close(); +} + +// Test that sending an invalid frame results in `upstream_cx_protocol_error`. +TEST_P(Http2FrameIntegrationTest, UpstreamProtocolError) { + config_helper_.addRuntimeOverride( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "true"); + + beginSession(); + FakeRawConnectionPtr fake_upstream_connection; + + const uint32_t client_stream_idx = 1; + // Start a request and wait for it to reach the upstream. + sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url")); + ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection)); + const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame(); + ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame))); + test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1); + + ASSERT_TRUE(fake_upstream_connection->write("abcdefg this is not a valid h2 frame")); + + test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1); + + // Cleanup. + tcp_client_->close(); +} + +// Test that sending an invalid frame results in `upstream_cx_protocol_error`. +TEST_P(Http2FrameIntegrationTest, UpstreamProtocolErrorLegacy) { + config_helper_.addRuntimeOverride( + "envoy.reloadable_features.http2_no_protocol_error_upon_clean_close", "false"); + + beginSession(); + FakeRawConnectionPtr fake_upstream_connection; + + const uint32_t client_stream_idx = 1; + // Start a request and wait for it to reach the upstream. + sendFrame(Http2Frame::makePostRequest(client_stream_idx, "host", "/path/to/long/url")); + ASSERT_TRUE(fake_upstreams_[0]->waitForRawConnection(fake_upstream_connection)); + const Http2Frame settings_frame = Http2Frame::makeEmptySettingsFrame(); + ASSERT_TRUE(fake_upstream_connection->write(std::string(settings_frame))); + test_server_->waitForGaugeEq("cluster.cluster_0.upstream_rq_active", 1); + + ASSERT_TRUE(fake_upstream_connection->write("abcdefg this is not a valid h2 frame")); + + test_server_->waitForCounterGe("cluster.cluster_0.upstream_cx_protocol_error", 1); // Cleanup. tcp_client_->close();