diff --git a/source/common/upstream/health_checker_impl.cc b/source/common/upstream/health_checker_impl.cc index 4090ba9dc082..6a8382a825ed 100644 --- a/source/common/upstream/health_checker_impl.cc +++ b/source/common/upstream/health_checker_impl.cc @@ -815,10 +815,17 @@ void GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::onGoAway( // Even if we have active health check probe, fail it on GOAWAY and schedule new one. if (request_encoder_) { handleFailure(envoy::data::core::v3::NETWORK); - expect_reset_ = true; - request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset); + // request_encoder_ can already be destroyed if the host was removed during the failure callback + // above. + if (request_encoder_ != nullptr) { + expect_reset_ = true; + request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset); + } + } + // client_ can already be destroyed if the host was removed during the failure callback above. + if (client_ != nullptr) { + client_->close(); } - client_->close(); } bool GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::isHealthCheckSucceeded( @@ -852,12 +859,17 @@ void GrpcHealthCheckerImpl::GrpcActiveHealthCheckSession::onRpcComplete( if (end_stream) { resetState(); } else { - // resetState() will be called by onResetStream(). - expect_reset_ = true; - request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset); + // request_encoder_ can already be destroyed if the host was removed during the failure callback + // above. + if (request_encoder_ != nullptr) { + // resetState() will be called by onResetStream(). + expect_reset_ = true; + request_encoder_->getStream().resetStream(Http::StreamResetReason::LocalReset); + } } - if (!parent_.reuse_connection_ || goaway) { + // client_ can already be destroyed if the host was removed during the failure callback above. + if (client_ != nullptr && (!parent_.reuse_connection_ || goaway)) { client_->close(); } } diff --git a/test/common/upstream/health_checker_impl_test.cc b/test/common/upstream/health_checker_impl_test.cc index e8ec25a14a6e..0aedf94a6e5e 100644 --- a/test/common/upstream/health_checker_impl_test.cc +++ b/test/common/upstream/health_checker_impl_test.cc @@ -4737,6 +4737,70 @@ TEST_F(GrpcHealthCheckerImplTest, SuccessStartFailedFailFirst) { expectHostHealthy(true); } +// Verify functionality when a host is removed inline with a failure via RPC that was proceeded +// by a GOAWAY. +TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaRpcRemoveHostInCallback) { + setupHC(); + cluster_->prioritySet().getMockHostSet(0)->hosts_ = { + makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())}; + + expectSessionCreate(); + expectHealthcheckStart(0); + EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true)); + health_checker_->start(); + + EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed)) + .WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) { + cluster_->prioritySet().getMockHostSet(0)->hosts_ = {}; + cluster_->prioritySet().runUpdateCallbacks(0, {}, {host}); + })); + EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _)); + test_sessions_[0]->codec_client_->raiseGoAway(Http::GoAwayErrorCode::NoError); + respondServiceStatus(0, grpc::health::v1::HealthCheckResponse::NOT_SERVING); +} + +// Verify functionality when a host is removed inline with a failure via an error GOAWAY. +TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaGoawayRemoveHostInCallback) { + setupHCWithUnhealthyThreshold(/*threshold=*/1); + cluster_->prioritySet().getMockHostSet(0)->hosts_ = { + makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())}; + + expectSessionCreate(); + expectHealthcheckStart(0); + EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true)); + health_checker_->start(); + + EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed)) + .WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) { + cluster_->prioritySet().getMockHostSet(0)->hosts_ = {}; + cluster_->prioritySet().runUpdateCallbacks(0, {}, {host}); + })); + EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _)); + test_sessions_[0]->codec_client_->raiseGoAway(Http::GoAwayErrorCode::Other); +} + +// Verify functionality when a host is removed inline with by a bad RPC response. +TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFailViaBadResponseRemoveHostInCallback) { + setupHCWithUnhealthyThreshold(/*threshold=*/1); + cluster_->prioritySet().getMockHostSet(0)->hosts_ = { + makeTestHost(cluster_->info_, "tcp://127.0.0.1:80", simTime())}; + + expectSessionCreate(); + expectHealthcheckStart(0); + EXPECT_CALL(event_logger_, logUnhealthy(_, _, _, true)); + health_checker_->start(); + + EXPECT_CALL(*this, onHostStatus(_, HealthTransition::Changed)) + .WillOnce(Invoke([&](HostSharedPtr host, HealthTransition) { + cluster_->prioritySet().getMockHostSet(0)->hosts_ = {}; + cluster_->prioritySet().runUpdateCallbacks(0, {}, {host}); + })); + EXPECT_CALL(event_logger_, logEjectUnhealthy(_, _, _)); + std::unique_ptr response_headers( + new Http::TestResponseHeaderMapImpl{{":status", "500"}}); + test_sessions_[0]->stream_response_callbacks_->decodeHeaders(std::move(response_headers), false); +} + // Test host recovery after explicit check failure requires several successful checks. TEST_F(GrpcHealthCheckerImplTest, GrpcHealthFail) { setupHC();