-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
alts: Fix TsiSocket doWrite on short writes #15962
Changes from 6 commits
1647c44
b9d5778
a104741
db79a59
a4c3230
31e5882
9e02cc8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,7 +166,11 @@ Network::PostIoAction TsiSocket::doHandshakeNextDone(NextResultPtr&& next_result | |
|
||
// Try to write raw buffer when next call is done, even this is not in do[Read|Write] stack. | ||
if (raw_write_buffer_.length() > 0) { | ||
return raw_buffer_socket_->doWrite(raw_write_buffer_, false).action_; | ||
Network::IoResult result = raw_buffer_socket_->doWrite(raw_write_buffer_, false); | ||
if (handshake_complete_ && raw_write_buffer_.length() > 0) { | ||
write_buffer_contains_handshake_bytes_ = true; | ||
} | ||
return result.action_; | ||
} | ||
|
||
return Network::PostIoAction::KeepOpen; | ||
|
@@ -259,28 +263,84 @@ Network::IoResult TsiSocket::doRead(Buffer::Instance& buffer) { | |
return repeatReadAndUnprotect(buffer, result); | ||
} | ||
|
||
Network::IoResult TsiSocket::repeatProtectAndWrite(Buffer::Instance& buffer, bool end_stream) { | ||
uint64_t total_bytes_written = 0; | ||
Network::IoResult result = {Network::PostIoAction::KeepOpen, 0, false}; | ||
|
||
ASSERT(!write_buffer_contains_handshake_bytes_); | ||
while (true) { | ||
uint64_t bytes_to_drain_this_iteration = | ||
prev_bytes_to_drain_ > 0 | ||
? prev_bytes_to_drain_ | ||
: std::min(buffer.length(), actual_frame_size_to_use_ - frame_overhead_size_); | ||
// Consumed all data. Exit. | ||
if (bytes_to_drain_this_iteration == 0) { | ||
break; | ||
} | ||
// Short write did not occur previously. | ||
if (raw_write_buffer_.length() == 0) { | ||
ASSERT(frame_protector_); | ||
ASSERT(prev_bytes_to_drain_ == 0); | ||
|
||
// Do protect. | ||
ENVOY_CONN_LOG(debug, "TSI: protecting buffer size: {}", callbacks_->connection(), | ||
bytes_to_drain_this_iteration); | ||
tsi_result status = frame_protector_->protect( | ||
grpc_slice_from_static_buffer(buffer.linearize(bytes_to_drain_this_iteration), | ||
bytes_to_drain_this_iteration), | ||
raw_write_buffer_); | ||
ENVOY_CONN_LOG(debug, "TSI: protected buffer left: {} result: {}", callbacks_->connection(), | ||
bytes_to_drain_this_iteration, tsi_result_to_string(status)); | ||
} | ||
|
||
// Write raw_write_buffer_ to network. | ||
ENVOY_CONN_LOG(debug, "TSI: raw_write length {} end_stream {}", callbacks_->connection(), | ||
raw_write_buffer_.length(), end_stream); | ||
result = raw_buffer_socket_->doWrite(raw_write_buffer_, end_stream && (buffer.length() == 0)); | ||
|
||
// Short write. Exit. | ||
if (raw_write_buffer_.length() > 0) { | ||
prev_bytes_to_drain_ = bytes_to_drain_this_iteration; | ||
break; | ||
} else { | ||
buffer.drain(bytes_to_drain_this_iteration); | ||
prev_bytes_to_drain_ = 0; | ||
total_bytes_written += bytes_to_drain_this_iteration; | ||
} | ||
} | ||
|
||
return {result.action_, total_bytes_written, false}; | ||
} | ||
|
||
Network::IoResult TsiSocket::doWrite(Buffer::Instance& buffer, bool end_stream) { | ||
if (!handshake_complete_) { | ||
Network::PostIoAction action = doHandshake(); | ||
// Envoy ALTS implements asynchronous tsi_handshaker_next() interface | ||
// which returns immediately after scheduling a handshake request to | ||
// the handshake service. The handshake response will be handled by a | ||
// dedicated thread in a seperate API within which handshake_complete_ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Failing format checks due to spell error. seperate -> separate |
||
// will be set to true if the handshake completes. | ||
ASSERT(!handshake_complete_); | ||
ASSERT(action == Network::PostIoAction::KeepOpen); | ||
// TODO(lizan): Handle synchronous handshake when TsiHandshaker supports it. | ||
} | ||
|
||
if (handshake_complete_) { | ||
return {Network::PostIoAction::KeepOpen, 0, false}; | ||
} else { | ||
ASSERT(frame_protector_); | ||
ENVOY_CONN_LOG(debug, "TSI: protecting buffer size: {}", callbacks_->connection(), | ||
buffer.length()); | ||
tsi_result status = frame_protector_->protect(buffer, raw_write_buffer_); | ||
ENVOY_CONN_LOG(debug, "TSI: protected buffer left: {} result: {}", callbacks_->connection(), | ||
buffer.length(), tsi_result_to_string(status)); | ||
} | ||
|
||
if (raw_write_buffer_.length() > 0) { | ||
ENVOY_CONN_LOG(debug, "TSI: raw_write length {} end_stream {}", callbacks_->connection(), | ||
raw_write_buffer_.length(), end_stream); | ||
return raw_buffer_socket_->doWrite(raw_write_buffer_, end_stream && (buffer.length() == 0)); | ||
// Check if we need to flush outstanding handshake bytes. | ||
if (write_buffer_contains_handshake_bytes_) { | ||
ASSERT(raw_write_buffer_.length() > 0); | ||
ENVOY_CONN_LOG(debug, "TSI: raw_write length {} end_stream {}", callbacks_->connection(), | ||
raw_write_buffer_.length(), end_stream); | ||
Network::IoResult result = | ||
raw_buffer_socket_->doWrite(raw_write_buffer_, end_stream && (buffer.length() == 0)); | ||
// Check if short write occurred. | ||
if (raw_write_buffer_.length() > 0) { | ||
return {result.action_, 0, false}; | ||
} | ||
write_buffer_contains_handshake_bytes_ = false; | ||
} | ||
return repeatProtectAndWrite(buffer, end_stream); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about the raw_write_buffer_.length() > 0 code below this early return? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we still need it to send handshake data to its peer. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, but think that it could be moved to the "if (!handshake_complete_) {" branch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
return {Network::PostIoAction::KeepOpen, 0, false}; | ||
} | ||
|
||
void TsiSocket::closeSocket(Network::ConnectionEvent) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's an odd case that merits special consideration:
prev_bytes_to_drain_ == 0 && raw_write_buffer_.length() > 0
I think this can happen in the case where doHandshake adds bytes to raw_write_buffer_ but also completes the handshake. When this happens, the protect call below is skipped, but bytes_to_drain_this_iteration ends up being > 0, which could result in bytes in the input buffer being discarded without being sent.
Ways to detect:
ASSERT((prev_bytes_to_drain_ == 0) == (raw_write_buffer_.length() == 0);
ASSERT(prev_bytes_to_drain_ >= buffer.length());
ASSERT(buffer.length() >= bytes_to_drain_this_iteration)
before the call tobuffer.drain()
further downPossibly adding an
ASSERT
toOwnedImpl::drainImpl
to detect attempts to drain more bytes than are in the buffer.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Per this code, I think we have a guarantee that raw_write_buffer_.length() is 0 when entering doWrite() for the first time after handshake completes. Also, it does not make sense that a peer wants to send non-handshake data without first confirming if the handshake completes successfully. In other words, during handshake, peer A will send whatever it receives from peer B to the handshake service in order to get the bytes to send to peer B. Here, peer A will not concatenate any non-handshake data to the data received from peer B, and send them to the handshake service because peer A has not received any confirmation from the handshake service that handshake completes successfully.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
raw_write_buffer_ will be non-empty after the doWrite just after handshake completes if that doWrite results in a partial write. I know this may be really unlikely but it is possible for raw_write_buffer_ to be non-empty after handshake completes. I think it also true that the peer that completes the handshake first will need to do a write after handshake completes locally to provide the remote peer the information it needs to complete its own handshake.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Possible solution: branch on
raw_write_buffer_.length() > 0
instead ofprev_bytes_to_drain_
.When
raw_write_buffer_.length() > 0
thenbytes_to_drain_this_iteration = prev_bytes_to_drain_
and we should attempt a write even ifbytes_to_drain_this_iteration
is 0 which would happen if the bytes in the buffer are handshake bytes.When
raw_write_buffer_.length() > 0
thenbytes_to_drain_this_iteration = std::min(buffer.length(), max_unprotected_frame_size_)
. If >0, attempt to protect and write those bytes, else break.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I took a slightly different approach by introducing a new field -
prev_handshake_bytes_to_drain_
that indicates if we need to drain handshake data before doing regular protect+write operations. PTAL.