Skip to content

Commit

Permalink
Stop treating BUSY response as fatal when CASE re-attempts are enable…
Browse files Browse the repository at this point in the history
…d. (project-chip#32490)

1. We now communicate the requested delay to the session establishment delegate
   when we get a BUSY response.
2. OperationalSessionSetup, when it has CASE re-attempts enabled, treats a BUSY
   response as a trigger to reattempt, not a fatal error.
3. In CASEServer, set the requested delay to a better value if we have sent
   Sigma2 and are waiting for a response to that.

Fixes project-chip#28288
  • Loading branch information
bzbarsky-apple authored Mar 7, 2024
1 parent bd42fb7 commit c554f44
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 15 deletions.
43 changes: 38 additions & 5 deletions src/app/OperationalSessionSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, Sess
// member instead of having a boolean
// mTryingNextResultDueToSessionEstablishmentError, so we can recover the
// error in UpdateDeviceData.
if (CHIP_ERROR_TIMEOUT == error)
if (CHIP_ERROR_TIMEOUT == error || CHIP_ERROR_BUSY == error)
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Make a copy of the ReliableMessageProtocolConfig, since our
Expand Down Expand Up @@ -480,6 +480,15 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, Sess
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}

void OperationalSessionSetup::OnResponderBusy(System::Clock::Milliseconds16 requestedDelay)
{
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
// Store the requested delay, so that we can use it for scheduling our
// retry.
mRequestedBusyDelay = requestedDelay;
#endif
}

void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session)
{
VerifyOrReturn(mState == State::Connecting,
Expand Down Expand Up @@ -705,9 +714,22 @@ CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock:
static_assert(UINT16_MAX / CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS >=
(1 << CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF),
"Our backoff calculation will overflow.");
timerDelay = System::Clock::Seconds16(
System::Clock::Timeout actualTimerDelay = System::Clock::Seconds16(
static_cast<uint16_t>(CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_INITIAL_DELAY_SECONDS
<< min((mAttemptsDone - 1), CHIP_DEVICE_CONFIG_AUTOMATIC_CASE_RETRY_MAX_BACKOFF)));
const bool responseWasBusy = mRequestedBusyDelay != System::Clock::kZero;
if (responseWasBusy)
{
if (mRequestedBusyDelay > actualTimerDelay)
{
actualTimerDelay = mRequestedBusyDelay;
}

// Reset mRequestedBusyDelay now that we have consumed it, so it does
// not affect future reattempts not triggered by a busy response.
mRequestedBusyDelay = System::Clock::kZero;
}

if (mAttemptsDone % 2 == 0)
{
// It's possible that the other side received one of our Sigma1 messages
Expand All @@ -716,11 +738,22 @@ CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock:
// listening for Sigma1 messages again.
//
// To handle that, on every other retry, add the amount of time it would
// take the other side to time out.
// take the other side to time out. It would be nice if we could rely
// on the delay reported in a BUSY response to just tell us that value,
// but in practice for old devices BUSY often sends some hardcoded value
// that tells us nothing about when the other side will decide it has
// timed out.
auto additionalTimeout = CASESession::ComputeSigma2ResponseTimeout(GetLocalMRPConfig().ValueOr(GetDefaultMRPConfig()));
timerDelay += std::chrono::duration_cast<System::Clock::Seconds16>(additionalTimeout);
actualTimerDelay += additionalTimeout;
}
CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(timerDelay, TrySetupAgain, this);
timerDelay = std::chrono::duration_cast<System::Clock::Seconds16>(actualTimerDelay);

CHIP_ERROR err = mInitParams.exchangeMgr->GetSessionManager()->SystemLayer()->StartTimer(actualTimerDelay, TrySetupAgain, this);

// TODO: If responseWasBusy, should we increment, mRemainingAttempts and
// mResolveAttemptsAllowed, since we were explicitly told to retry? Hard to
// tell what consumers expect out of a capped retry count here.

// The cast on count() is needed because the type count() returns might not
// actually be uint16_t; on some platforms it's int.
ChipLogProgress(Discovery,
Expand Down
3 changes: 3 additions & 0 deletions src/app/OperationalSessionSetup.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ class DLL_EXPORT OperationalSessionSetup : public SessionEstablishmentDelegate,
//////////// SessionEstablishmentDelegate Implementation ///////////////
void OnSessionEstablished(const SessionHandle & session) override;
void OnSessionEstablishmentError(CHIP_ERROR error, SessionEstablishmentStage stage) override;
void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) override;

ScopedNodeId GetPeerId() const { return mPeerId; }

Expand Down Expand Up @@ -319,6 +320,8 @@ class DLL_EXPORT OperationalSessionSetup : public SessionEstablishmentDelegate,

uint8_t mResolveAttemptsAllowed = 0;

System::Clock::Milliseconds16 mRequestedBusyDelay = System::Clock::kZero;

Callback::CallbackDeque mConnectionRetry;
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

Expand Down
28 changes: 25 additions & 3 deletions src/protocols/secure_channel/CASEServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,31 @@ CHIP_ERROR CASEServer::OnMessageReceived(Messaging::ExchangeContext * ec, const
// Handshake wasn't stuck, send the busy status report and let the existing handshake continue.

// A successful CASE handshake can take several seconds and some may time out (30 seconds or more).
// TODO: Come up with better estimate: https://github.com/project-chip/connectedhomeip/issues/28288
// For now, setting minimum wait time to 5000 milliseconds.
CHIP_ERROR err = SendBusyStatusReport(ec, System::Clock::Milliseconds16(5000));

System::Clock::Milliseconds16 delay = System::Clock::kZero;
if (GetSession().GetState() == CASESession::State::kSentSigma2)
{
// The delay should be however long we think it will take for
// that to time out.
auto sigma2Timeout = CASESession::ComputeSigma2ResponseTimeout(GetSession().GetRemoteMRPConfig());
if (sigma2Timeout < System::Clock::Milliseconds16::max())
{
delay = std::chrono::duration_cast<System::Clock::Milliseconds16>(sigma2Timeout);
}
else
{
// Avoid overflow issues, just wait for as long as we can to
// get close to our expected Sigma2 timeout.
delay = System::Clock::Milliseconds16::max();
}
}
else
{
// For now, setting minimum wait time to 5000 milliseconds if we
// have no other information.
delay = System::Clock::Milliseconds16(5000);
}
CHIP_ERROR err = SendBusyStatusReport(ec, delay);
if (err != CHIP_NO_ERROR)
{
ChipLogError(Inet, "Failed to send the busy status report, err:%" CHIP_ERROR_FORMAT, err.Format());
Expand Down
7 changes: 6 additions & 1 deletion src/protocols/secure_channel/CASESession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1966,7 +1966,8 @@ void CASESession::OnSuccessStatusReport()
Finish();
}

CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
Optional<uintptr_t> protocolData)
{
CHIP_ERROR err = CHIP_NO_ERROR;
switch (protocolCode)
Expand All @@ -1981,6 +1982,10 @@ CHIP_ERROR CASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralS

case kProtocolCodeBusy:
err = CHIP_ERROR_BUSY;
if (protocolData.HasValue())
{
mDelegate->OnResponderBusy(System::Clock::Milliseconds16(static_cast<uint16_t>(protocolData.Value())));
}
break;

default:
Expand Down
3 changes: 2 additions & 1 deletion src/protocols/secure_channel/CASESession.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ class DLL_EXPORT CASESession : public Messaging::UnsolicitedMessageHandler,
const ByteSpan & skInfo, const ByteSpan & nonce);

void OnSuccessStatusReport() override;
CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
Optional<uintptr_t> protocolData) override;

void AbortPendingEstablish(CHIP_ERROR err);

Expand Down
3 changes: 2 additions & 1 deletion src/protocols/secure_channel/PASESession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,8 @@ void PASESession::OnSuccessStatusReport()
Finish();
}

CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)
CHIP_ERROR PASESession::OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
Optional<uintptr_t> protocolData)
{
CHIP_ERROR err = CHIP_NO_ERROR;
switch (protocolCode)
Expand Down
3 changes: 2 additions & 1 deletion src/protocols/secure_channel/PASESession.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ class DLL_EXPORT PASESession : public Messaging::UnsolicitedMessageHandler,
CHIP_ERROR HandleMsg3(System::PacketBufferHandle && msg);

void OnSuccessStatusReport() override;
CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode) override;
CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
Optional<uintptr_t> protocolData) override;

void Finish();

Expand Down
12 changes: 9 additions & 3 deletions src/protocols/secure_channel/PairingSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#pragma once

#include <lib/core/CHIPError.h>
#include <lib/core/Optional.h>
#include <lib/core/TLV.h>
#include <messaging/ExchangeContext.h>
#include <messaging/SessionParameters.h>
Expand Down Expand Up @@ -129,7 +130,11 @@ class DLL_EXPORT PairingSession : public SessionDelegate

void SetPeerSessionId(uint16_t id) { mPeerSessionId.SetValue(id); }
virtual void OnSuccessStatusReport() {}
virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode)

// Handle a failure StatusReport message from the server. protocolData will
// depend on exactly what the generalCode/protocolCode are.
virtual CHIP_ERROR OnFailureStatusReport(Protocols::SecureChannel::GeneralStatusCode generalCode, uint16_t protocolCode,
Optional<uintptr_t> protocolData)
{
return CHIP_ERROR_INTERNAL;
}
Expand Down Expand Up @@ -174,6 +179,7 @@ class DLL_EXPORT PairingSession : public SessionDelegate
return CHIP_NO_ERROR;
}

Optional<uintptr_t> protocolData;
if (report.GetGeneralCode() == Protocols::SecureChannel::GeneralStatusCode::kBusy &&
report.GetProtocolCode() == Protocols::SecureChannel::kProtocolCodeBusy)
{
Expand All @@ -189,15 +195,15 @@ class DLL_EXPORT PairingSession : public SessionDelegate
}
else
{
// TODO: CASE: Notify minimum wait time to clients on receiving busy status report #28290
ChipLogProgress(SecureChannel, "Received busy status report with minimum wait time: %u ms", minimumWaitTime);
protocolData.Emplace(minimumWaitTime);
}
}
}

// It's very important that we propagate the return value from
// OnFailureStatusReport out to the caller. Make sure we return it directly.
return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode());
return OnFailureStatusReport(report.GetGeneralCode(), report.GetProtocolCode(), protocolData);
}

/**
Expand Down
10 changes: 10 additions & 0 deletions src/protocols/secure_channel/SessionEstablishmentDelegate.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#pragma once

#include <system/SystemClock.h>
#include <system/SystemPacketBuffer.h>
#include <transport/Session.h>
#include <transport/raw/MessageHeader.h>
Expand Down Expand Up @@ -80,6 +81,15 @@ class DLL_EXPORT SessionEstablishmentDelegate
*/
virtual void OnSessionEstablished(const SessionHandle & session) {}

/**
* Called when the responder has responded with a "busy" status code and
* provided a requested delay.
*
* This call will be followed by an OnSessionEstablishmentError with
* CHIP_ERROR_BUSY as the error.
*/
virtual void OnResponderBusy(System::Clock::Milliseconds16 requestedDelay) {}

virtual ~SessionEstablishmentDelegate() {}
};

Expand Down

0 comments on commit c554f44

Please sign in to comment.