From 36d445a9f8d59eceed740c2b40a25663d0afff76 Mon Sep 17 00:00:00 2001 From: Snow Pettersen Date: Fri, 28 Aug 2020 20:30:04 -0400 Subject: [PATCH] fault: only check for overflow when performing faults (#12843) This shuffles the check for overflow around so that we only increment the overflow value when attempting to apply a fault instead of for each request. This ensures that if applying a low % fault the overflow stat will be in line with that, instead of counting each request once the overflow threshold has been hit. Signed-off-by: Snow Pettersen Signed-off-by: Clara Andrew-Wani --- .../filters/http/fault/fault_filter.cc | 62 +++++++++++++------ .../filters/http/fault/fault_filter.h | 7 ++- .../filters/http/fault/fault_filter_test.cc | 47 +++++++------- 3 files changed, 73 insertions(+), 43 deletions(-) diff --git a/source/extensions/filters/http/fault/fault_filter.cc b/source/extensions/filters/http/fault/fault_filter.cc index 245f44b98a56..4b98e5583a89 100644 --- a/source/extensions/filters/http/fault/fault_filter.cc +++ b/source/extensions/filters/http/fault/fault_filter.cc @@ -8,6 +8,7 @@ #include "envoy/event/timer.h" #include "envoy/extensions/filters/http/fault/v3/fault.pb.h" #include "envoy/http/codes.h" +#include "envoy/http/filter.h" #include "envoy/http/header_map.h" #include "envoy/stats/scope.h" @@ -118,10 +119,6 @@ Http::FilterHeadersStatus FaultFilter::decodeHeaders(Http::RequestHeaderMap& hea fault_settings_ = per_route_settings ? per_route_settings : fault_settings_; } - if (faultOverflow()) { - return Http::FilterHeadersStatus::Continue; - } - if (!matchesTargetUpstreamCluster()) { return Http::FilterHeadersStatus::Continue; } @@ -156,27 +153,42 @@ Http::FilterHeadersStatus FaultFilter::decodeHeaders(Http::RequestHeaderMap& hea maybeSetupResponseRateLimit(headers); - absl::optional duration = delayDuration(headers); - if (duration.has_value()) { + if (maybeSetupDelay(headers)) { + return Http::FilterHeadersStatus::StopIteration; + } + + if (maybeDoAbort(headers)) { + return Http::FilterHeadersStatus::StopIteration; + } + + return Http::FilterHeadersStatus::Continue; +} + +bool FaultFilter::maybeSetupDelay(const Http::RequestHeaderMap& request_headers) { + absl::optional duration = delayDuration(request_headers); + if (duration.has_value() && tryIncActiveFaults()) { delay_timer_ = decoder_callbacks_->dispatcher().createTimer( - [this, &headers]() -> void { postDelayInjection(headers); }); + [this, &request_headers]() -> void { postDelayInjection(request_headers); }); ENVOY_LOG(debug, "fault: delaying request {}ms", duration.value().count()); delay_timer_->enableTimer(duration.value(), &decoder_callbacks_->scope()); recordDelaysInjectedStats(); decoder_callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::DelayInjected); - return Http::FilterHeadersStatus::StopIteration; + return true; } + return false; +} +bool FaultFilter::maybeDoAbort(const Http::RequestHeaderMap& request_headers) { absl::optional http_status; absl::optional grpc_status; - std::tie(http_status, grpc_status) = abortStatus(headers); + std::tie(http_status, grpc_status) = abortStatus(request_headers); - if (http_status.has_value()) { + if (http_status.has_value() && tryIncActiveFaults()) { abortWithStatus(http_status.value(), grpc_status); - return Http::FilterHeadersStatus::StopIteration; + return true; } - return Http::FilterHeadersStatus::Continue; + return false; } void FaultFilter::maybeSetupResponseRateLimit(const Http::RequestHeaderMap& request_headers) { @@ -190,8 +202,10 @@ void FaultFilter::maybeSetupResponseRateLimit(const Http::RequestHeaderMap& requ return; } - // General stats. All injected faults are considered a single aggregate active fault. - maybeIncActiveFaults(); + if (!tryIncActiveFaults()) { + return; + } + config_->stats().response_rl_injected_.inc(); response_limiter_ = std::make_unique( @@ -356,8 +370,6 @@ void FaultFilter::recordDelaysInjectedStats() { config_->incDelays(downstream_cluster_storage_->statName()); } - // General stats. All injected faults are considered a single aggregate active fault. - maybeIncActiveFaults(); config_->stats().delays_injected_.inc(); } @@ -367,8 +379,6 @@ void FaultFilter::recordAbortsInjectedStats() { config_->incAborts(downstream_cluster_storage_->statName()); } - // General stats. All injected faults are considered a single aggregate active fault. - maybeIncActiveFaults(); config_->stats().aborts_injected_.inc(); } @@ -391,15 +401,27 @@ FaultFilterStats FaultFilterConfig::generateStats(const std::string& prefix, Sta POOL_GAUGE_PREFIX(scope, final_prefix))}; } -void FaultFilter::maybeIncActiveFaults() { +bool FaultFilter::tryIncActiveFaults() { // Only charge 1 active fault per filter in case we are injecting multiple faults. + // Since we count at most one active fault per filter, we also allow multiple faults + // per filter without checking for overflow. if (fault_active_) { - return; + return true; + } + + // We only check for overflow when attempting to perform a fault. Note that this means that a + // single request might increment the counter more than once if it tries to apply multiple faults, + // and it is also possible for it to fail the first check then succeed on the second (should + // another thread decrement the active fault gauge). + if (faultOverflow()) { + return false; } // TODO(mattklein123): Consider per-fault type active fault gauges. config_->stats().active_faults_.inc(); fault_active_ = true; + + return true; } void FaultFilter::onDestroy() { diff --git a/source/extensions/filters/http/fault/fault_filter.h b/source/extensions/filters/http/fault/fault_filter.h index 206a8134c72c..e1ff4d275937 100644 --- a/source/extensions/filters/http/fault/fault_filter.h +++ b/source/extensions/filters/http/fault/fault_filter.h @@ -7,6 +7,7 @@ #include "envoy/extensions/filters/http/fault/v3/fault.pb.h" #include "envoy/http/filter.h" +#include "envoy/http/header_map.h" #include "envoy/runtime/runtime.h" #include "envoy/stats/scope.h" #include "envoy/stats/stats_macros.h" @@ -262,7 +263,11 @@ class FaultFilter : public Http::StreamFilter, Logger::Loggable abortHttpStatus(const Http::RequestHeaderMap& request_headers); absl::optional abortGrpcStatus(const Http::RequestHeaderMap& request_headers); - void maybeIncActiveFaults(); + // Attempts to increase the number of active faults. Returns false if we've reached the maximum + // number of allowed faults, in which case no fault should be performed. + bool tryIncActiveFaults(); + bool maybeDoAbort(const Http::RequestHeaderMap& request_headers); + bool maybeSetupDelay(const Http::RequestHeaderMap& request_headers); void maybeSetupResponseRateLimit(const Http::RequestHeaderMap& request_headers); FaultFilterConfigSharedPtr config_; diff --git a/test/extensions/filters/http/fault/fault_filter_test.cc b/test/extensions/filters/http/fault/fault_filter_test.cc index a29c5cc85816..0e25b9acb7de 100644 --- a/test/extensions/filters/http/fault/fault_filter_test.cc +++ b/test/extensions/filters/http/fault/fault_filter_test.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -482,10 +483,6 @@ TEST_F(FaultFilterTest, HeaderAbortWithHttpAndGrpcStatus) { TEST_F(FaultFilterTest, FixedDelayZeroDuration) { SetUpTest(fixed_delay_only_yaml); - EXPECT_CALL(runtime_.snapshot_, - getInteger("fault.http.max_active_faults", std::numeric_limits::max())) - .WillOnce(Return(std::numeric_limits::max())); - // Delay related calls EXPECT_CALL(runtime_.snapshot_, featureEnabled("fault.http.delay.fixed_delay_percent", @@ -511,18 +508,38 @@ TEST_F(FaultFilterTest, FixedDelayZeroDuration) { } TEST_F(FaultFilterTest, Overflow) { - envoy::extensions::filters::http::fault::v3::HTTPFault fault; - fault.mutable_max_active_faults()->set_value(0); - SetUpTest(fault); + SetUpTest(fixed_delay_only_yaml); + + // Delay related calls + EXPECT_CALL(runtime_.snapshot_, + featureEnabled("fault.http.delay.fixed_delay_percent", + Matcher(Percent(100)))) + .WillOnce(Return(true)); + + // Return 1ms delay + EXPECT_CALL(runtime_.snapshot_, getInteger("fault.http.delay.fixed_duration_ms", 5000)) + .WillOnce(Return(1)); - EXPECT_CALL(runtime_.snapshot_, getInteger("fault.http.max_active_faults", 0)) + EXPECT_CALL(runtime_.snapshot_, + getInteger("fault.http.max_active_faults", std::numeric_limits::max())) .WillOnce(Return(0)); EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true)); + EXPECT_EQ(0UL, config_->stats().active_faults_.value()); EXPECT_EQ(1UL, config_->stats().faults_overflow_.value()); } +// Verifies that we don't increment the active_faults gauge when not applying a fault. +TEST_F(FaultFilterTest, Passthrough) { + envoy::extensions::filters::http::fault::v3::HTTPFault fault; + SetUpTest(fault); + + EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true)); + + EXPECT_EQ(0UL, config_->stats().active_faults_.value()); +} + TEST_F(FaultFilterTest, FixedDelayDeprecatedPercentAndNonZeroDuration) { envoy::extensions::filters::http::fault::v3::HTTPFault fault; fault.mutable_delay()->mutable_percentage()->set_numerator(50); @@ -790,10 +807,6 @@ TEST_F(FaultFilterTest, FixedDelayAndAbortDownstreamNodes) { TEST_F(FaultFilterTest, NoDownstreamMatch) { SetUpTest(fixed_delay_and_abort_nodes_yaml); - EXPECT_CALL(runtime_.snapshot_, - getInteger("fault.http.max_active_faults", std::numeric_limits::max())) - .WillOnce(Return(std::numeric_limits::max())); - EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true)); } @@ -857,10 +870,6 @@ TEST_F(FaultFilterTest, FixedDelayAndAbortHeaderMatchFail) { request_headers_.addCopy("x-foo1", "Bar"); request_headers_.addCopy("x-foo3", "Baz"); - EXPECT_CALL(runtime_.snapshot_, - getInteger("fault.http.max_active_faults", std::numeric_limits::max())) - .WillOnce(Return(std::numeric_limits::max())); - EXPECT_CALL(runtime_.snapshot_, featureEnabled("fault.http.delay.fixed_delay_percent", Matcher(_))) @@ -984,9 +993,6 @@ TEST_F(FaultFilterTest, FaultWithTargetClusterMatchFail) { EXPECT_CALL(decoder_filter_callbacks_.route_->route_entry_, clusterName()) .WillOnce(ReturnRef(upstream_cluster)); - EXPECT_CALL(runtime_.snapshot_, - getInteger("fault.http.max_active_faults", std::numeric_limits::max())) - .WillOnce(Return(std::numeric_limits::max())); EXPECT_CALL(runtime_.snapshot_, featureEnabled("fault.http.delay.fixed_delay_percent", Matcher(_))) @@ -1014,9 +1020,6 @@ TEST_F(FaultFilterTest, FaultWithTargetClusterNullRoute) { const std::string upstream_cluster("www1"); EXPECT_CALL(*decoder_filter_callbacks_.route_, routeEntry()).WillRepeatedly(Return(nullptr)); - EXPECT_CALL(runtime_.snapshot_, - getInteger("fault.http.max_active_faults", std::numeric_limits::max())) - .WillOnce(Return(std::numeric_limits::max())); EXPECT_CALL(runtime_.snapshot_, featureEnabled("fault.http.delay.fixed_delay_percent", Matcher(_)))