Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

grid: Track HTTP/3 broken-ness in ConnectivityGrid. #15933

Merged
merged 14 commits into from
Apr 19, 2021
89 changes: 65 additions & 24 deletions source/common/http/conn_pool_grid.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@
namespace Envoy {
namespace Http {

namespace{
absl::string_view describePool(const ConnectionPool::Instance& pool) {
return pool.protocolDescription();
}
} // namespace

ConnectivityGrid::WrapperCallbacks::WrapperCallbacks(ConnectivityGrid& grid,
Http::ResponseDecoder& decoder,
PoolIterator pool_it,
ConnectionPool::Callbacks& callbacks)
: grid_(grid), decoder_(decoder), inner_callbacks_(callbacks),
: grid_(grid), decoder_(decoder), inner_callbacks_(&callbacks),
next_attempt_timer_(
grid_.dispatcher_.createTimer([this]() -> void { tryAnotherConnection(); })),
current_(pool_it) {}
Expand Down Expand Up @@ -43,8 +45,14 @@ void ConnectivityGrid::WrapperCallbacks::ConnectionAttemptCallbacks::onPoolFailu
void ConnectivityGrid::WrapperCallbacks::onConnectionAttemptFailed(
ConnectionAttemptCallbacks* attempt, ConnectionPool::PoolFailureReason reason,
absl::string_view transport_failure_reason, Upstream::HostDescriptionConstSharedPtr host) {
ASSERT(host == grid_.host_);
ENVOY_LOG(trace, "{} pool failed to create connection to host '{}'.",
describePool(attempt->pool()), grid_.host_->hostname());
describePool(attempt->pool()), host->hostname());
if (grid_.isPoolHttp3(attempt->pool())) {
http3_attempt_failed_ = true;
}
maybeMarkHttp3Broken();

auto delete_this_on_return = attempt->removeFromList(connection_attempts_);

// If there is another connection attempt in flight then let that proceed.
Expand All @@ -58,12 +66,15 @@ void ConnectivityGrid::WrapperCallbacks::onConnectionAttemptFailed(
}

// If this point is reached, all pools have been tried. Pass the pool failure up to the
// original caller.
ConnectionPool::Callbacks& callbacks = inner_callbacks_;
ENVOY_LOG(trace, "Passing pool failure up to caller.", describePool(attempt->pool()),
grid_.host_->hostname());
// original caller, if the caller hasn't already been notified.
ConnectionPool::Callbacks* callbacks = inner_callbacks_;
inner_callbacks_ = nullptr;
deleteThis();
callbacks.onPoolFailure(reason, transport_failure_reason, host);
if (callbacks != nullptr) {
ENVOY_LOG(trace, "Passing pool failure up to caller.", describePool(attempt->pool()),
host->hostname());
callbacks->onPoolFailure(reason, transport_failure_reason, host);
}
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
}

void ConnectivityGrid::WrapperCallbacks::deleteThis() {
Expand All @@ -87,17 +98,38 @@ void ConnectivityGrid::WrapperCallbacks::onConnectionAttemptReady(
ConnectionAttemptCallbacks* attempt, RequestEncoder& encoder,
Upstream::HostDescriptionConstSharedPtr host, const StreamInfo::StreamInfo& info,
absl::optional<Http::Protocol> protocol) {
ASSERT(host == grid_.host_);
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
ENVOY_LOG(trace, "{} pool successfully connected to host '{}'.", describePool(attempt->pool()),
grid_.host_->hostname());
auto delete_on_return = attempt->removeFromList(connection_attempts_);
// The first successful connection is passed up, and all others will be canceled.
// TODO: Ensure that if HTTP/2 succeeds, we can allow the HTTP/3 connection to run to completion.
for (auto& attempt : connection_attempts_) {
attempt->cancel(Envoy::ConnectionPool::CancelPolicy::Default);
host->hostname());
if (!grid_.isPoolHttp3(attempt->pool())) {
tcp_attempt_succeeded_ = true;
}
maybeMarkHttp3Broken();

auto delete_this_on_return = attempt->removeFromList(connection_attempts_);
ConnectionPool::Callbacks* callbacks = inner_callbacks_;
inner_callbacks_ = nullptr;
// If an HTTP/3 connection attempts is in progress, let it complete so that if it succeeds
// it can be used for future requests. But if there is a TCP connection attempt in progress,
// cancel it.
if (grid_.isPoolHttp3(attempt->pool())) {
for (auto& attempt : connection_attempts_) {
attempt->cancel(Envoy::ConnectionPool::CancelPolicy::Default);
}
}
if (connection_attempts_.empty()) {
deleteThis();
}
if (callbacks != nullptr) {
callbacks->onPoolReady(encoder, host, info, protocol);
}
}

void ConnectivityGrid::WrapperCallbacks::maybeMarkHttp3Broken() {
if (http3_attempt_failed_ && tcp_attempt_succeeded_) {
ENVOY_LOG(trace, "Marking HTTP/3 broken for host '{}'.", grid_.host_->hostname());
grid_.setIsHttp3Broken(true);
alyssawilk marked this conversation as resolved.
Show resolved Hide resolved
}
ConnectionPool::Callbacks& callbacks = inner_callbacks_;
deleteThis();
return callbacks.onPoolReady(encoder, host, info, protocol);
}

void ConnectivityGrid::WrapperCallbacks::ConnectionAttemptCallbacks::onPoolReady(
Expand Down Expand Up @@ -160,7 +192,7 @@ ConnectivityGrid::~ConnectivityGrid() {
absl::optional<ConnectivityGrid::PoolIterator> ConnectivityGrid::createNextPool() {
// Pools are created by newStream, which should not be called during draining.
ASSERT(drained_callbacks_.empty());
// Right now, only H3 and ALPN are supported, so if there are 2 pools we're done.
// Right now, only H3 and TCP are supported, so if there are 2 pools we're done.
if (pools_.size() == 2 || !drained_callbacks_.empty()) {
return absl::nullopt;
}
Expand Down Expand Up @@ -193,17 +225,22 @@ ConnectionPool::Cancellable* ConnectivityGrid::newStream(Http::ResponseDecoder&
if (pools_.empty()) {
createNextPool();
}

// TODO(#15649) track pools with successful connections: don't always start at
// the front of the list.
auto wrapped_callback =
std::make_unique<WrapperCallbacks>(*this, decoder, pools_.begin(), callbacks);
PoolIterator pool = pools_.begin();
if (is_http3_broken_) {
ENVOY_LOG(trace, "HTTP/3 is broken to host '{}', skipping.", describePool(**pool),
host_->hostname());
// Since HTTP/3 is broken, presumably both pools have already been created so this
// is just to be safe.
createNextPool();
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
++pool;
}
auto wrapped_callback = std::make_unique<WrapperCallbacks>(*this, decoder, pool, callbacks);
ConnectionPool::Cancellable* ret = wrapped_callback.get();
LinkedList::moveIntoList(std::move(wrapped_callback), wrapped_callbacks_);
// Note that in the case of immediate attempt/failure, newStream will delete this.
if (wrapped_callbacks_.front()->newStream() == StreamCreationResult::ImmediateResult) {
// If newStream succeeds, return nullptr as the caller has received their
// callback and does not need a cancellable handle.
// callback and does not need a cancellable handle. At this point the
// WrappedCallbacks object has also been deleted.
return nullptr;
}
return ret;
Expand Down Expand Up @@ -248,6 +285,10 @@ absl::optional<ConnectivityGrid::PoolIterator> ConnectivityGrid::nextPool(PoolIt
return createNextPool();
}

bool ConnectivityGrid::isPoolHttp3(const ConnectionPool::Instance& pool) {
return &pool == pools_.begin()->get();
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
}

void ConnectivityGrid::onDrainReceived() {
// Don't do any work under the stack of ~ConnectivityGrid()
if (destroying_) {
Expand Down
26 changes: 21 additions & 5 deletions source/common/http/conn_pool_grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ class ConnectivityGrid : public ConnectionPool::Instance,
// Attempt to create a new stream for pool().
StreamCreationResult newStream();

// Removes this from the owning list, deleting it.
void deleteThis();

// Called on pool failure or timeout to kick off another connection attempt.
// Returns true if there is a failover pool and a connection has been
// attempted, false if all pools have been tried.
Expand All @@ -95,6 +92,13 @@ class ConnectivityGrid : public ConnectionPool::Instance,
absl::optional<Http::Protocol> protocol);

private:
// Removes this from the owning list, deleting it.
void deleteThis();

// Marks HTTP/3 broken if the HTTP/3 attempt failed but a TCP attempt succeeded.
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
// While HTTP/3 is broken the grid will not attempt to make new HTTP/3 connections.
void maybeMarkHttp3Broken();

// Tracks all the connection attempts which currently in flight.
std::list<ConnectionAttemptCallbacksPtr> connection_attempts_;

Expand All @@ -103,12 +107,17 @@ class ConnectivityGrid : public ConnectionPool::Instance,
// The decoder for the original newStream, needed to create streams on subsequent pools.
Http::ResponseDecoder& decoder_;
// The callbacks from the original caller, which must get onPoolFailure or
// onPoolReady unless there is call to cancel().
ConnectionPool::Callbacks& inner_callbacks_;
// onPoolReady unless there is call to cancel(). Will be nullptr if the caller
// has been notified while attempts are still pending.
ConnectionPool::Callbacks* inner_callbacks_;
// The timer which tracks when new connections should be attempted.
Event::TimerPtr next_attempt_timer_;
// The iterator to the last pool which had a connection attempt.
PoolIterator current_;
// True if the HTTP/3 attempt failed.
bool http3_attempt_failed_{};
RyanTheOptimist marked this conversation as resolved.
Show resolved Hide resolved
// True if the TCP attempt succeeded.
bool tcp_attempt_succeeded_{};
};
using WrapperCallbacksPtr = std::unique_ptr<WrapperCallbacks>;

Expand All @@ -134,6 +143,12 @@ class ConnectivityGrid : public ConnectionPool::Instance,
// Returns the next pool in the ordered priority list.
absl::optional<PoolIterator> nextPool(PoolIterator pool_it);

// Returns true if pool is the grid's HTTP/3 connection pool.
bool isPoolHttp3(const ConnectionPool::Instance& pool);

bool isHttp3Broken() const { return is_http3_broken_; }
void setIsHttp3Broken(bool is_http3_broken) { is_http3_broken_ = is_http3_broken; }

private:
friend class ConnectivityGridForTest;

Expand All @@ -155,6 +170,7 @@ class ConnectivityGrid : public ConnectionPool::Instance,
Upstream::ClusterConnectivityState& state_;
std::chrono::milliseconds next_attempt_duration_;
TimeSource& time_source_;
bool is_http3_broken_{};

// Tracks how many drains are needed before calling drain callbacks. This is
// set to the number of pools when the first drain callbacks are added, and
Expand Down
69 changes: 62 additions & 7 deletions test/common/http/conn_pool_grid_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ class ConnectivityGridForTest : public ConnectivityGrid {
if (immediate_success_) {
callbacks.onPoolReady(*encoder_, host(), *info_, absl::nullopt);
return nullptr;
} else if (immediate_failure_) {
}
if (immediate_failure_) {
callbacks.onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
"reason", host());
return nullptr;
} else {
callbacks_.push_back(&callbacks);
return &cancel_;
}
callbacks_.push_back(&callbacks);
return &cancel_;
}));
if (pools_.size() == 1) {
EXPECT_CALL(*first(), protocolDescription())
Expand Down Expand Up @@ -97,24 +97,24 @@ class ConnectivityGridTest : public Event::TestUsingSimulatedTime, public testin
public:
ConnectivityGridTest()
: options_({Http::Protocol::Http11, Http::Protocol::Http2, Http::Protocol::Http3}),
host_(new NiceMock<Upstream::MockHostDescription>()),
grid_(dispatcher_, random_,
Upstream::makeTestHost(cluster_, "hostname", "tcp://127.0.0.1:9000", simTime()),
Upstream::ResourcePriority::Default, socket_options_, transport_socket_options_,
state_, simTime(), std::chrono::milliseconds(300), options_) {
state_, simTime(), std::chrono::milliseconds(300), options_),
host_(grid_.host()) {
grid_.info_ = &info_;
grid_.encoder_ = &encoder_;
}

const Network::ConnectionSocket::OptionsSharedPtr socket_options_;
const Network::TransportSocketOptionsSharedPtr transport_socket_options_;
ConnectivityGrid::ConnectivityOptions options_;
Upstream::HostDescriptionConstSharedPtr host_;
Upstream::ClusterConnectivityState state_;
NiceMock<Event::MockDispatcher> dispatcher_;
std::shared_ptr<Upstream::MockClusterInfo> cluster_{new NiceMock<Upstream::MockClusterInfo>()};
NiceMock<Random::MockRandomGenerator> random_;
ConnectivityGridForTest grid_;
Upstream::HostDescriptionConstSharedPtr host_;

NiceMock<ConnPoolCallbacks> callbacks_;
NiceMock<MockResponseDecoder> decoder_;
Expand All @@ -134,6 +134,7 @@ TEST_F(ConnectivityGridTest, Success) {
ASSERT_NE(grid_.callbacks(), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_.callbacks()->onPoolReady(encoder_, host_, info_, absl::nullopt);
EXPECT_FALSE(grid_.isHttp3Broken());
}

// Test the first pool successfully connecting under the stack of newStream.
Expand All @@ -143,6 +144,7 @@ TEST_F(ConnectivityGridTest, ImmediateSuccess) {
EXPECT_CALL(callbacks_.pool_ready_, ready());
EXPECT_EQ(grid_.newStream(decoder_, callbacks_), nullptr);
EXPECT_NE(grid_.first(), nullptr);
EXPECT_FALSE(grid_.isHttp3Broken());
}

// Test the first pool failing and the second connecting.
Expand Down Expand Up @@ -171,6 +173,7 @@ TEST_F(ConnectivityGridTest, FailureThenSuccessSerial) {
EXPECT_CALL(callbacks_.pool_ready_, ready());
EXPECT_LOG_CONTAINS("trace", "second pool successfully connected to host 'hostname'",
grid_.callbacks(1)->onPoolReady(encoder_, host_, info_, absl::nullopt));
EXPECT_TRUE(grid_.isHttp3Broken());
}

// Test both connections happening in parallel and the second connecting.
Expand Down Expand Up @@ -200,6 +203,7 @@ TEST_F(ConnectivityGridTest, TimeoutThenSuccessParallelSecondConnects) {
EXPECT_NE(grid_.callbacks(), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_.callbacks(1)->onPoolReady(encoder_, host_, info_, absl::nullopt);
EXPECT_TRUE(grid_.isHttp3Broken());
}

// Test both connections happening in parallel and the first connecting.
Expand Down Expand Up @@ -227,6 +231,38 @@ TEST_F(ConnectivityGridTest, TimeoutThenSuccessParallelFirstConnects) {
EXPECT_NE(grid_.callbacks(0), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_.callbacks(0)->onPoolReady(encoder_, host_, info_, absl::nullopt);
EXPECT_FALSE(grid_.isHttp3Broken());
}

// Test both connections happening in parallel and the second connecting before
// the first eventually fails.
TEST_F(ConnectivityGridTest, TimeoutThenSuccessParallelSecondConnectsFirstFail) {
alyssawilk marked this conversation as resolved.
Show resolved Hide resolved
EXPECT_EQ(grid_.first(), nullptr);

// This timer will be returned and armed as the grid creates the wrapper's failover timer.
Event::MockTimer* failover_timer = new NiceMock<MockTimer>(&dispatcher_);

grid_.newStream(decoder_, callbacks_);
EXPECT_NE(grid_.first(), nullptr);
EXPECT_TRUE(failover_timer->enabled_);

// Kick off the second connection.
failover_timer->invokeCallback();
EXPECT_NE(grid_.second(), nullptr);

// onPoolReady should be passed from the pool back to the original caller.
EXPECT_NE(grid_.callbacks(1), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_.callbacks(1)->onPoolReady(encoder_, host_, info_, absl::nullopt);
EXPECT_FALSE(grid_.isHttp3Broken());

// onPoolFailure should not be passed up the first time. Instead the grid
// should wait on the other pool
EXPECT_NE(grid_.callbacks(0), nullptr);
EXPECT_CALL(callbacks_.pool_failure_, ready()).Times(0);
grid_.callbacks(0)->onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
"reason", host_);
EXPECT_TRUE(grid_.isHttp3Broken());
}

// Test that after the first pool fails, subsequent connections will
Expand Down Expand Up @@ -260,6 +296,7 @@ TEST_F(ConnectivityGridTest, ImmediateDoubleFailure) {
grid_.immediate_failure_ = true;
EXPECT_CALL(callbacks_.pool_failure_, ready());
EXPECT_EQ(grid_.newStream(decoder_, callbacks_), nullptr);
EXPECT_FALSE(grid_.isHttp3Broken());
}

// Test both connections happening in parallel and both failing.
Expand Down Expand Up @@ -287,6 +324,7 @@ TEST_F(ConnectivityGridTest, TimeoutDoubleFailureParallel) {
EXPECT_CALL(callbacks_.pool_failure_, ready());
grid_.callbacks(1)->onPoolFailure(ConnectionPool::PoolFailureReason::LocalConnectionFailure,
"reason", host_);
EXPECT_FALSE(grid_.isHttp3Broken());
}

// Test cancellation
Expand Down Expand Up @@ -384,6 +422,23 @@ TEST_F(ConnectivityGridTest, NoDrainOnTeardown) {
EXPECT_FALSE(drain_received);
}

// Test that when HTTP/3 is broken then the HTTP/3 pool is skipped.
TEST_F(ConnectivityGridTest, SuccessAfterBroken) {
grid_.setIsHttp3Broken(true);
EXPECT_EQ(grid_.first(), nullptr);

EXPECT_LOG_CONTAINS("trace", "HTTP/3 is broken to host 'first', skipping.",
EXPECT_NE(grid_.newStream(decoder_, callbacks_), nullptr));
EXPECT_NE(grid_.first(), nullptr);
EXPECT_NE(grid_.second(), nullptr);

// onPoolReady should be passed from the pool back to the original caller.
ASSERT_NE(grid_.callbacks(), nullptr);
EXPECT_CALL(callbacks_.pool_ready_, ready());
grid_.callbacks()->onPoolReady(encoder_, host_, info_, absl::nullopt);
EXPECT_TRUE(grid_.isHttp3Broken());
}

#ifdef ENVOY_ENABLE_QUICHE

} // namespace
Expand Down