Skip to content

Commit

Permalink
fault: only check for overflow when performing faults (envoyproxy#12843)
Browse files Browse the repository at this point in the history
This shuffles the check for overflow around so that we only increment
the overflow value when attempting to apply a fault instead of for
each request. This ensures that if applying a low % fault the overflow
stat will be in line with that, instead of counting each request once
the overflow threshold has been hit.

Signed-off-by: Snow Pettersen <[email protected]>
Signed-off-by: Clara Andrew-Wani <[email protected]>
  • Loading branch information
snowp authored and clarakosi committed Sep 3, 2020
1 parent 32eebc6 commit 36d445a
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 43 deletions.
62 changes: 42 additions & 20 deletions source/extensions/filters/http/fault/fault_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "envoy/event/timer.h"
#include "envoy/extensions/filters/http/fault/v3/fault.pb.h"
#include "envoy/http/codes.h"
#include "envoy/http/filter.h"
#include "envoy/http/header_map.h"
#include "envoy/stats/scope.h"

Expand Down Expand Up @@ -118,10 +119,6 @@ Http::FilterHeadersStatus FaultFilter::decodeHeaders(Http::RequestHeaderMap& hea
fault_settings_ = per_route_settings ? per_route_settings : fault_settings_;
}

if (faultOverflow()) {
return Http::FilterHeadersStatus::Continue;
}

if (!matchesTargetUpstreamCluster()) {
return Http::FilterHeadersStatus::Continue;
}
Expand Down Expand Up @@ -156,27 +153,42 @@ Http::FilterHeadersStatus FaultFilter::decodeHeaders(Http::RequestHeaderMap& hea

maybeSetupResponseRateLimit(headers);

absl::optional<std::chrono::milliseconds> duration = delayDuration(headers);
if (duration.has_value()) {
if (maybeSetupDelay(headers)) {
return Http::FilterHeadersStatus::StopIteration;
}

if (maybeDoAbort(headers)) {
return Http::FilterHeadersStatus::StopIteration;
}

return Http::FilterHeadersStatus::Continue;
}

bool FaultFilter::maybeSetupDelay(const Http::RequestHeaderMap& request_headers) {
absl::optional<std::chrono::milliseconds> duration = delayDuration(request_headers);
if (duration.has_value() && tryIncActiveFaults()) {
delay_timer_ = decoder_callbacks_->dispatcher().createTimer(
[this, &headers]() -> void { postDelayInjection(headers); });
[this, &request_headers]() -> void { postDelayInjection(request_headers); });
ENVOY_LOG(debug, "fault: delaying request {}ms", duration.value().count());
delay_timer_->enableTimer(duration.value(), &decoder_callbacks_->scope());
recordDelaysInjectedStats();
decoder_callbacks_->streamInfo().setResponseFlag(StreamInfo::ResponseFlag::DelayInjected);
return Http::FilterHeadersStatus::StopIteration;
return true;
}
return false;
}

bool FaultFilter::maybeDoAbort(const Http::RequestHeaderMap& request_headers) {
absl::optional<Http::Code> http_status;
absl::optional<Grpc::Status::GrpcStatus> grpc_status;
std::tie(http_status, grpc_status) = abortStatus(headers);
std::tie(http_status, grpc_status) = abortStatus(request_headers);

if (http_status.has_value()) {
if (http_status.has_value() && tryIncActiveFaults()) {
abortWithStatus(http_status.value(), grpc_status);
return Http::FilterHeadersStatus::StopIteration;
return true;
}

return Http::FilterHeadersStatus::Continue;
return false;
}

void FaultFilter::maybeSetupResponseRateLimit(const Http::RequestHeaderMap& request_headers) {
Expand All @@ -190,8 +202,10 @@ void FaultFilter::maybeSetupResponseRateLimit(const Http::RequestHeaderMap& requ
return;
}

// General stats. All injected faults are considered a single aggregate active fault.
maybeIncActiveFaults();
if (!tryIncActiveFaults()) {
return;
}

config_->stats().response_rl_injected_.inc();

response_limiter_ = std::make_unique<StreamRateLimiter>(
Expand Down Expand Up @@ -356,8 +370,6 @@ void FaultFilter::recordDelaysInjectedStats() {
config_->incDelays(downstream_cluster_storage_->statName());
}

// General stats. All injected faults are considered a single aggregate active fault.
maybeIncActiveFaults();
config_->stats().delays_injected_.inc();
}

Expand All @@ -367,8 +379,6 @@ void FaultFilter::recordAbortsInjectedStats() {
config_->incAborts(downstream_cluster_storage_->statName());
}

// General stats. All injected faults are considered a single aggregate active fault.
maybeIncActiveFaults();
config_->stats().aborts_injected_.inc();
}

Expand All @@ -391,15 +401,27 @@ FaultFilterStats FaultFilterConfig::generateStats(const std::string& prefix, Sta
POOL_GAUGE_PREFIX(scope, final_prefix))};
}

void FaultFilter::maybeIncActiveFaults() {
bool FaultFilter::tryIncActiveFaults() {
// Only charge 1 active fault per filter in case we are injecting multiple faults.
// Since we count at most one active fault per filter, we also allow multiple faults
// per filter without checking for overflow.
if (fault_active_) {
return;
return true;
}

// We only check for overflow when attempting to perform a fault. Note that this means that a
// single request might increment the counter more than once if it tries to apply multiple faults,
// and it is also possible for it to fail the first check then succeed on the second (should
// another thread decrement the active fault gauge).
if (faultOverflow()) {
return false;
}

// TODO(mattklein123): Consider per-fault type active fault gauges.
config_->stats().active_faults_.inc();
fault_active_ = true;

return true;
}

void FaultFilter::onDestroy() {
Expand Down
7 changes: 6 additions & 1 deletion source/extensions/filters/http/fault/fault_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "envoy/extensions/filters/http/fault/v3/fault.pb.h"
#include "envoy/http/filter.h"
#include "envoy/http/header_map.h"
#include "envoy/runtime/runtime.h"
#include "envoy/stats/scope.h"
#include "envoy/stats/stats_macros.h"
Expand Down Expand Up @@ -262,7 +263,11 @@ class FaultFilter : public Http::StreamFilter, Logger::Loggable<Logger::Id::filt
absl::optional<Http::Code> abortHttpStatus(const Http::RequestHeaderMap& request_headers);
absl::optional<Grpc::Status::GrpcStatus>
abortGrpcStatus(const Http::RequestHeaderMap& request_headers);
void maybeIncActiveFaults();
// Attempts to increase the number of active faults. Returns false if we've reached the maximum
// number of allowed faults, in which case no fault should be performed.
bool tryIncActiveFaults();
bool maybeDoAbort(const Http::RequestHeaderMap& request_headers);
bool maybeSetupDelay(const Http::RequestHeaderMap& request_headers);
void maybeSetupResponseRateLimit(const Http::RequestHeaderMap& request_headers);

FaultFilterConfigSharedPtr config_;
Expand Down
47 changes: 25 additions & 22 deletions test/extensions/filters/http/fault/fault_filter_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <chrono>
#include <cstdint>
#include <limits>
#include <memory>
#include <string>

Expand Down Expand Up @@ -482,10 +483,6 @@ TEST_F(FaultFilterTest, HeaderAbortWithHttpAndGrpcStatus) {
TEST_F(FaultFilterTest, FixedDelayZeroDuration) {
SetUpTest(fixed_delay_only_yaml);

EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(std::numeric_limits<uint64_t>::max()));

// Delay related calls
EXPECT_CALL(runtime_.snapshot_,
featureEnabled("fault.http.delay.fixed_delay_percent",
Expand All @@ -511,18 +508,38 @@ TEST_F(FaultFilterTest, FixedDelayZeroDuration) {
}

TEST_F(FaultFilterTest, Overflow) {
envoy::extensions::filters::http::fault::v3::HTTPFault fault;
fault.mutable_max_active_faults()->set_value(0);
SetUpTest(fault);
SetUpTest(fixed_delay_only_yaml);

// Delay related calls
EXPECT_CALL(runtime_.snapshot_,
featureEnabled("fault.http.delay.fixed_delay_percent",
Matcher<const envoy::type::v3::FractionalPercent&>(Percent(100))))
.WillOnce(Return(true));

// Return 1ms delay
EXPECT_CALL(runtime_.snapshot_, getInteger("fault.http.delay.fixed_duration_ms", 5000))
.WillOnce(Return(1));

EXPECT_CALL(runtime_.snapshot_, getInteger("fault.http.max_active_faults", 0))
EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(0));

EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true));

EXPECT_EQ(0UL, config_->stats().active_faults_.value());
EXPECT_EQ(1UL, config_->stats().faults_overflow_.value());
}

// Verifies that we don't increment the active_faults gauge when not applying a fault.
TEST_F(FaultFilterTest, Passthrough) {
envoy::extensions::filters::http::fault::v3::HTTPFault fault;
SetUpTest(fault);

EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true));

EXPECT_EQ(0UL, config_->stats().active_faults_.value());
}

TEST_F(FaultFilterTest, FixedDelayDeprecatedPercentAndNonZeroDuration) {
envoy::extensions::filters::http::fault::v3::HTTPFault fault;
fault.mutable_delay()->mutable_percentage()->set_numerator(50);
Expand Down Expand Up @@ -790,10 +807,6 @@ TEST_F(FaultFilterTest, FixedDelayAndAbortDownstreamNodes) {
TEST_F(FaultFilterTest, NoDownstreamMatch) {
SetUpTest(fixed_delay_and_abort_nodes_yaml);

EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(std::numeric_limits<uint64_t>::max()));

EXPECT_EQ(Http::FilterHeadersStatus::Continue, filter_->decodeHeaders(request_headers_, true));
}

Expand Down Expand Up @@ -857,10 +870,6 @@ TEST_F(FaultFilterTest, FixedDelayAndAbortHeaderMatchFail) {
request_headers_.addCopy("x-foo1", "Bar");
request_headers_.addCopy("x-foo3", "Baz");

EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(std::numeric_limits<uint64_t>::max()));

EXPECT_CALL(runtime_.snapshot_,
featureEnabled("fault.http.delay.fixed_delay_percent",
Matcher<const envoy::type::v3::FractionalPercent&>(_)))
Expand Down Expand Up @@ -984,9 +993,6 @@ TEST_F(FaultFilterTest, FaultWithTargetClusterMatchFail) {

EXPECT_CALL(decoder_filter_callbacks_.route_->route_entry_, clusterName())
.WillOnce(ReturnRef(upstream_cluster));
EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(std::numeric_limits<uint64_t>::max()));
EXPECT_CALL(runtime_.snapshot_,
featureEnabled("fault.http.delay.fixed_delay_percent",
Matcher<const envoy::type::v3::FractionalPercent&>(_)))
Expand Down Expand Up @@ -1014,9 +1020,6 @@ TEST_F(FaultFilterTest, FaultWithTargetClusterNullRoute) {
const std::string upstream_cluster("www1");

EXPECT_CALL(*decoder_filter_callbacks_.route_, routeEntry()).WillRepeatedly(Return(nullptr));
EXPECT_CALL(runtime_.snapshot_,
getInteger("fault.http.max_active_faults", std::numeric_limits<uint64_t>::max()))
.WillOnce(Return(std::numeric_limits<uint64_t>::max()));
EXPECT_CALL(runtime_.snapshot_,
featureEnabled("fault.http.delay.fixed_delay_percent",
Matcher<const envoy::type::v3::FractionalPercent&>(_)))
Expand Down

0 comments on commit 36d445a

Please sign in to comment.