Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve our various round-trip timeout computations. #33587

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/app/OperationalSessionSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,11 @@ CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock:
// but in practice for old devices BUSY often sends some hardcoded value
// that tells us nothing about when the other side will decide it has
// timed out.
//
// Unfortunately, we do not have the MRP config for the other side here,
// but in practice if the other side is using its local config to
// compute Sigma2 response timeouts, then it's also returning useful
// values with BUSY, so we will wait long enough.
auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
actualTimerDelay += additionalTimeout;
}
Expand Down
32 changes: 17 additions & 15 deletions src/app/ReadClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,24 +938,26 @@ CHIP_ERROR ReadClient::ComputeLivenessCheckTimerTimeout(System::Clock::Timeout *

//
// To calculate the duration we're willing to wait for a report to come to us, we take into account the maximum interval of
// the subscription AND the time it takes for the report to make it to us in the worst case. This latter bit involves
// computing the Ack timeout from the publisher for the ReportData message being sent to us using our IDLE interval as the
// basis for that computation.
// the subscription AND the time it takes for the report to make it to us in the worst case.
//
// Make sure to use the retransmission computation that includes backoff. For purposes of that computation, treat us as
// active now (since we are right now sending/receiving messages), and use the default "how long are we guaranteed to stay
// active" threshold for now.
// We have no way to estimate what the network latency will be, but we do know the other side will time out its ReportData
// after its computed round-trip timeout plus the processing time it gives us (app::kExpectedIMProcessingTime). Once it
// times out, assuming it sent the report at all, there's no point in us thinking we still have a subscription.
//
// TODO: We need to find a good home for this logic that will correctly compute this based on transport. For now, this will
// suffice since we don't use TCP as a transport currently and subscriptions over BLE aren't really a thing.
// We can't use ComputeRoundTripTimeout() on the session for two reasons: we want the roundtrip timeout from the point of
// view of the peer, not us, and we want to start off with the assumption the peer will likely have, which is that we are
// idle, whereas ComputeRoundTripTimeout() uses the current activity state of the peer.
//
const auto & localMRPConfig = GetLocalMRPConfig();
const auto & defaultMRPConfig = GetDefaultMRPConfig();
const auto & ourMrpConfig = localMRPConfig.ValueOr(defaultMRPConfig);
auto publisherTransmissionTimeout =
GetRetransmissionTimeout(ourMrpConfig.mActiveRetransTimeout, ourMrpConfig.mIdleRetransTimeout,
System::SystemClock().GetMonotonicTimestamp(), ourMrpConfig.mActiveThresholdTime);
*aTimeout = System::Clock::Seconds16(mMaxInterval) + publisherTransmissionTimeout;
// So recompute the round-trip timeout directly. Assume MRP, since in practice that is likely what is happening.
auto & peerMRPConfig = mReadPrepareParams.mSessionHolder->GetRemoteMRPConfig();
// Peer will assume we are idle (hence we pass kZero to GetMessageReceiptTimeout()), but will assume we treat it as active
// for the response, so to match the retransmission timeout computation for the message back to the peeer, we should treat
// it as active.
auto roundTripTimeout = mReadPrepareParams.mSessionHolder->GetMessageReceiptTimeout(System::Clock::kZero) +
kExpectedIMProcessingTime +
GetRetransmissionTimeout(peerMRPConfig.mActiveRetransTimeout, peerMRPConfig.mIdleRetransTimeout,
System::SystemClock().GetMonotonicTimestamp(), peerMRPConfig.mActiveThresholdTime);
*aTimeout = System::Clock::Seconds16(mMaxInterval) + roundTripTimeout;
return CHIP_NO_ERROR;
}

Expand Down
4 changes: 4 additions & 0 deletions src/darwin/Framework/CHIPTests/MTRControllerTests.m
Original file line number Diff line number Diff line change
Expand Up @@ -1557,6 +1557,10 @@ - (void)testSetMRPParameters
// Can be called before starting the factory
XCTAssertFalse(MTRDeviceControllerFactory.sharedInstance.running);
MTRSetMessageReliabilityParameters(@2000, @2000, @2000, @2000);

// Now reset back to the default state, so timings in other tests are not
// affected.
MTRSetMessageReliabilityParameters(nil, nil, nil, nil);
}

@end
Original file line number Diff line number Diff line change
Expand Up @@ -2049,6 +2049,10 @@ - (void)testSetMRPParametersWithRunningController
XCTAssertTrue(controller.running);
MTRSetMessageReliabilityParameters(@2000, @2000, @2000, @2000);
[controller shutdown];

// Now reset back to the default state, so timings in other tests are not
// affected.
MTRSetMessageReliabilityParameters(nil, nil, nil, nil);
}

static NSString * const kLocalTestUserDefaultDomain = @"org.csa-iot.matter.darwintest";
Expand Down
31 changes: 23 additions & 8 deletions src/protocols/secure_channel/CASESession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2362,21 +2362,36 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
return err;
}

System::Clock::Timeout CASESession::ComputeSigma1ResponseTimeout(const ReliableMessageProtocolConfig & remoteMrpConfig)
namespace {
System::Clock::Timeout ComputeRoundTripTimeout(ExchangeContext::Timeout serverProcessingTime,
const ReliableMessageProtocolConfig & remoteMrpConfig)
{
// TODO: This is duplicating logic from Session::ComputeRoundTripTimeout. Unfortunately, it's called by
// consumers who do not have a session.
const auto & maybeLocalMRPConfig = GetLocalMRPConfig();
const auto & defaultMRRPConfig = GetDefaultMRPConfig();
const auto & localMRPConfig = maybeLocalMRPConfig.ValueOr(defaultMRRPConfig);
return GetRetransmissionTimeout(remoteMrpConfig.mActiveRetransTimeout, remoteMrpConfig.mIdleRetransTimeout,
// Assume peer is idle, since that's what we
// will assume for our initial message.
// Assume peer is idle, as a worst-case assumption (probably true for
// Sigma1, since that will be our initial message on the session, but less
// so for Sigma2).
System::Clock::kZero, remoteMrpConfig.mActiveThresholdTime) +
kExpectedSigma1ProcessingTime;
serverProcessingTime +
GetRetransmissionTimeout(localMRPConfig.mActiveRetransTimeout, localMRPConfig.mIdleRetransTimeout,
// Peer will assume we are active, since it's
// responding to our message.
System::SystemClock().GetMonotonicTimestamp(), localMRPConfig.mActiveThresholdTime);
}
} // anonymous namespace

System::Clock::Timeout CASESession::ComputeSigma1ResponseTimeout(const ReliableMessageProtocolConfig & remoteMrpConfig)
{
return ComputeRoundTripTimeout(kExpectedSigma1ProcessingTime, remoteMrpConfig);
}

System::Clock::Timeout CASESession::ComputeSigma2ResponseTimeout(const ReliableMessageProtocolConfig & remoteMrpConfig)
{
return GetRetransmissionTimeout(remoteMrpConfig.mActiveRetransTimeout, remoteMrpConfig.mIdleRetransTimeout,
// Assume peer is idle, as a worst-case assumption.
System::Clock::kZero, remoteMrpConfig.mActiveThresholdTime) +
kExpectedHighProcessingTime;
return ComputeRoundTripTimeout(kExpectedHighProcessingTime, remoteMrpConfig);
}

bool CASESession::InvokeBackgroundWorkWatchdog()
Expand Down
14 changes: 14 additions & 0 deletions src/transport/GroupSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ class IncomingGroupSession : public Session, public ReferenceCounted<IncomingGro
return System::Clock::Timeout();
}

System::Clock::Milliseconds32 GetMessageReceiptTimeout(System::Clock::Timestamp ourLastActivity) const override
{
// There are no timeouts for group sessions.
VerifyOrDie(false);
return System::Clock::Timeout();
}

GroupId GetGroupId() const { return mGroupId; }

private:
Expand Down Expand Up @@ -127,6 +134,13 @@ class OutgoingGroupSession : public Session, public ReferenceCounted<OutgoingGro
return System::Clock::Timeout();
}

System::Clock::Milliseconds32 GetMessageReceiptTimeout(System::Clock::Timestamp ourLastActivity) const override
{
// There are no timeouts for group sessions.
VerifyOrDie(false);
return System::Clock::Timeout();
}

GroupId GetGroupId() const { return mGroupId; }

private:
Expand Down
21 changes: 21 additions & 0 deletions src/transport/SecureSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,27 @@ class SecureSession : public Session, public ReferenceCounted<SecureSession, Sec
return System::Clock::Timeout();
}

System::Clock::Milliseconds32 GetMessageReceiptTimeout(System::Clock::Timestamp ourLastActivity) const override
{
switch (mPeerAddress.GetTransportType())
{
case Transport::Type::kUdp: {
const auto & maybeLocalMRPConfig = GetLocalMRPConfig();
const auto & defaultMRRPConfig = GetDefaultMRPConfig();
const auto & localMRPConfig = maybeLocalMRPConfig.ValueOr(defaultMRRPConfig);
return GetRetransmissionTimeout(localMRPConfig.mActiveRetransTimeout, localMRPConfig.mIdleRetransTimeout,
ourLastActivity, localMRPConfig.mActiveThresholdTime);
}
case Transport::Type::kTcp:
return System::Clock::Seconds16(30);
case Transport::Type::kBle:
return System::Clock::Milliseconds32(BTP_ACK_TIMEOUT_MS);
default:
break;
}
return System::Clock::Timeout();
}

const PeerAddress & GetPeerAddress() const { return mPeerAddress; }
void SetPeerAddress(const PeerAddress & address) { mPeerAddress = address; }

Expand Down
5 changes: 4 additions & 1 deletion src/transport/Session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ System::Clock::Timeout Session::ComputeRoundTripTimeout(System::Clock::Timeout u
{
return System::Clock::kZero;
}
return GetAckTimeout() + upperlayerProcessingTimeout;

// Treat us as active for purposes of GetMessageReceiptTimeout(), since the
// other side would be responding to our message.
return GetAckTimeout() + upperlayerProcessingTimeout + GetMessageReceiptTimeout(System::SystemClock().GetMonotonicTimestamp());
}

uint16_t Session::SessionIdForLogging() const
Expand Down
16 changes: 15 additions & 1 deletion src/transport/Session.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,21 @@ class Session
virtual bool AllowsLargePayload() const = 0;
virtual const SessionParameters & GetRemoteSessionParameters() const = 0;
virtual System::Clock::Timestamp GetMRPBaseTimeout() const = 0;
virtual System::Clock::Milliseconds32 GetAckTimeout() const = 0;
// GetAckTimeout is the estimate for how long it could take for the other
// side to receive our message (accounting for our MRP retransmits if it
// gets lost) and send a response.
virtual System::Clock::Milliseconds32 GetAckTimeout() const = 0;

// GetReceiptTimeout is the estimate for how long it could take for us to
// receive a message after the other side sends it, accounting for the MRP
// retransmits the other side might do if the message gets lost.
//
// The caller is expected to provide an estimate for when the peer would
// last have heard from us. The most likely values to pass are
// System::SystemClock().GetMonotonicTimestamp() (to indicate "peer is
// responding to a message it just received") and System::Clock::kZero (to
// indicate "peer is reaching out to us, not in response to anything").
virtual System::Clock::Milliseconds32 GetMessageReceiptTimeout(System::Clock::Timestamp ourLastActivity) const = 0;

const ReliableMessageProtocolConfig & GetRemoteMRPConfig() const { return GetRemoteSessionParameters().GetMRPConfig(); }

Expand Down
21 changes: 21 additions & 0 deletions src/transport/UnauthenticatedSessionTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,27 @@ class UnauthenticatedSession : public Session, public ReferenceCounted<Unauthent
return System::Clock::Timeout();
}

System::Clock::Milliseconds32 GetMessageReceiptTimeout(System::Clock::Timestamp ourLastActivity) const override
{
switch (mPeerAddress.GetTransportType())
{
case Transport::Type::kUdp: {
const auto & maybeLocalMRPConfig = GetLocalMRPConfig();
const auto & defaultMRRPConfig = GetDefaultMRPConfig();
const auto & localMRPConfig = maybeLocalMRPConfig.ValueOr(defaultMRRPConfig);
return GetRetransmissionTimeout(localMRPConfig.mActiveRetransTimeout, localMRPConfig.mIdleRetransTimeout,
ourLastActivity, localMRPConfig.mActiveThresholdTime);
}
case Transport::Type::kTcp:
return System::Clock::Seconds16(30);
case Transport::Type::kBle:
return System::Clock::Milliseconds32(BTP_ACK_TIMEOUT_MS);
default:
break;
}
return System::Clock::Timeout();
}

NodeId GetPeerNodeId() const
{
if (mSessionRole == SessionRole::kInitiator)
Expand Down
Loading