diff --git a/src/app/CASEClient.cpp b/src/app/CASEClient.cpp index f6fa82c686e827..7ce5a539e6105a 100644 --- a/src/app/CASEClient.cpp +++ b/src/app/CASEClient.cpp @@ -24,6 +24,11 @@ void CASEClient::SetRemoteMRPIntervals(const ReliableMessageProtocolConfig & rem mCASESession.SetRemoteMRPConfig(remoteMRPConfig); } +const ReliableMessageProtocolConfig & CASEClient::GetRemoteMRPIntervals() +{ + return mCASESession.GetRemoteMRPConfig(); +} + CHIP_ERROR CASEClient::EstablishSession(const CASEClientInitParams & params, const ScopedNodeId & peer, const Transport::PeerAddress & peerAddress, const ReliableMessageProtocolConfig & remoteMRPConfig, diff --git a/src/app/CASEClient.h b/src/app/CASEClient.h index 3a5aa8ded7ff08..b640ae066f0f09 100644 --- a/src/app/CASEClient.h +++ b/src/app/CASEClient.h @@ -54,6 +54,8 @@ class DLL_EXPORT CASEClient public: void SetRemoteMRPIntervals(const ReliableMessageProtocolConfig & remoteMRPConfig); + const ReliableMessageProtocolConfig & GetRemoteMRPIntervals(); + CHIP_ERROR EstablishSession(const CASEClientInitParams & params, const ScopedNodeId & peer, const Transport::PeerAddress & peerAddress, const ReliableMessageProtocolConfig & remoteMRPConfig, SessionEstablishmentDelegate * delegate); diff --git a/src/app/CASESessionManager.cpp b/src/app/CASESessionManager.cpp index 3fca62d5cc37ed..52c48f102672b3 100644 --- a/src/app/CASESessionManager.cpp +++ b/src/app/CASESessionManager.cpp @@ -33,7 +33,7 @@ void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Cal Callback::Callback * onFailure #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES , - uint8_t attemptCount + uint8_t attemptCount, Callback::Callback * onRetry #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES ) { @@ -60,6 +60,10 @@ void CASESessionManager::FindOrEstablishSession(const ScopedNodeId & peerId, Cal #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES session->UpdateAttemptCount(attemptCount); + if (onRetry) + { + session->AddRetryHandler(onRetry); + } #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES session->Connect(onConnection, onFailure); diff --git a/src/app/CASESessionManager.h b/src/app/CASESessionManager.h index 455ca0b3b36fe1..8b803a0a10f0a2 100644 --- a/src/app/CASESessionManager.h +++ b/src/app/CASESessionManager.h @@ -85,7 +85,7 @@ class CASESessionManager : public OperationalSessionReleaseDelegate, public Sess Callback::Callback * onFailure #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES , - uint8_t attemptCount = 1 + uint8_t attemptCount = 1, Callback::Callback * onRetry = nullptr #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES ); diff --git a/src/app/OperationalSessionSetup.cpp b/src/app/OperationalSessionSetup.cpp index a87741ea3a246b..1deeebf8c92f5f 100644 --- a/src/app/OperationalSessionSetup.cpp +++ b/src/app/OperationalSessionSetup.cpp @@ -263,6 +263,15 @@ void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error) mConnectionFailure.DequeueAll(failureReady); mConnectionSuccess.DequeueAll(successReady); +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + // Clear out mConnectionRetry, so that those cancelables are not holding + // pointers to us, since we're about to go away. + while (auto * cb = mConnectionRetry.First()) + { + cb->Cancel(); + } +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + // // If we encountered no error, go ahead and call all success callbacks. Otherwise, // call the failure callbacks. @@ -304,13 +313,23 @@ void OperationalSessionSetup::DequeueConnectionCallbacks(CHIP_ERROR error) void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error) { - VerifyOrReturn(mState != State::Uninitialized && mState != State::NeedsAddress, - ChipLogError(Discovery, "HandleCASEConnectionFailure was called while the device was not initialized")); + VerifyOrReturn(mState == State::Connecting, + ChipLogError(Discovery, "OnSessionEstablishmentError was called while we were not connecting")); if (CHIP_ERROR_TIMEOUT == error) { +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + // Make a copy of the ReliableMessageProtocolConfig, since our + // mCaseClient is about to go away. + ReliableMessageProtocolConfig remoteMprConfig = mCASEClient->GetRemoteMRPIntervals(); +#endif + if (CHIP_NO_ERROR == Resolver::Instance().TryNextResult(mAddressLookupHandle)) { +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + // Our retry is going to be immediate, once the event loop spins. + NotifyRetryHandlers(error, remoteMprConfig, System::Clock::kZero); +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES MoveToState(State::ResolvingAddress); return; } @@ -318,9 +337,11 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error) #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES if (mRemainingAttempts > 0) { - CHIP_ERROR err = ScheduleSessionSetupReattempt(); + System::Clock::Seconds16 reattemptDelay; + CHIP_ERROR err = ScheduleSessionSetupReattempt(reattemptDelay); if (err == CHIP_NO_ERROR) { + NotifyRetryHandlers(error, remoteMprConfig, reattemptDelay); return; } } @@ -333,8 +354,8 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error) void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session) { - VerifyOrReturn(mState != State::Uninitialized, - ChipLogError(Discovery, "HandleCASEConnected was called while the device was not initialized")); + VerifyOrReturn(mState == State::Connecting, + ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting")); if (!mSecureSession.Grab(session)) return; // Got an invalid session, do not change any state @@ -489,7 +510,7 @@ void OperationalSessionSetup::UpdateAttemptCount(uint8_t attemptCount) } } -CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt() +CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay) { VerifyOrDie(mRemainingAttempts > 0); // Try again, but not if things are in shutdown such that we can't get @@ -500,7 +521,6 @@ CHIP_ERROR OperationalSessionSetup::ScheduleSessionSetupReattempt() } MoveToState(State::NeedsAddress); - System::Clock::Seconds16 timerDelay; // Stop exponential backoff before our delays get too large. // // Note that mAttemptsDone is always > 0 here, because we have @@ -545,6 +565,27 @@ void OperationalSessionSetup::TrySetupAgain(System::Layer * systemLayer, void * self->DequeueConnectionCallbacks(err); // Do not touch `self` instance anymore; it has been destroyed in DequeueConnectionCallbacks. } + +void OperationalSessionSetup::AddRetryHandler(Callback::Callback * onRetry) +{ + mConnectionRetry.Enqueue(onRetry->Cancel()); +} + +void OperationalSessionSetup::NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig, + System::Clock::Seconds16 retryDelay) +{ + // Compute the time we are likely to need to detect that the retry has + // failed. + System::Clock::Timeout messageTimeout = CASESession::ComputeSigma1ResponseTimeout(remoteMrpConfig); + auto timeoutSecs = std::chrono::duration_cast(messageTimeout); + // Add 1 second in case we had fractional milliseconds in messageTimeout. + timeoutSecs += System::Clock::Seconds16(1); + for (auto * item = mConnectionRetry.First(); item && item != &mConnectionRetry; item = item->mNext) + { + auto cb = Callback::Callback::FromCancelable(item); + cb->mCall(cb->mContext, mPeerId, error, timeoutSecs + retryDelay); + } +} #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES } // namespace chip diff --git a/src/app/OperationalSessionSetup.h b/src/app/OperationalSessionSetup.h index 029d355e25ef21..653cb61a5682f7 100644 --- a/src/app/OperationalSessionSetup.h +++ b/src/app/OperationalSessionSetup.h @@ -36,8 +36,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -117,8 +119,20 @@ class OperationalDeviceProxy : public DeviceProxy * implementations as an example. */ typedef void (*OnDeviceConnected)(void * context, Messaging::ExchangeManager & exchangeMgr, const SessionHandle & sessionHandle); + +/** + * Callback prototype when secure session establishment fails. + */ typedef void (*OnDeviceConnectionFailure)(void * context, const ScopedNodeId & peerId, CHIP_ERROR error); +/** + * Callback prototype when secure session establishement has failed and will be + * retried. retryTimeout indicates how much time will pass before we know + * whether the retry has timed out waiting for a response to our Sigma1 message. + */ +typedef void (*OnDeviceConnectionRetry)(void * context, const ScopedNodeId & peerId, CHIP_ERROR error, + System::Clock::Seconds16 retryTimeout); + /** * Object used to either establish a connection to peer or performing address lookup to a peer. * @@ -227,6 +241,9 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate, #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES // Update our remaining attempt count to be at least the given value. void UpdateAttemptCount(uint8_t attemptCount); + + // Add a retry handler for this session setup. + void AddRetryHandler(Callback::Callback * onRetry); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES private: @@ -268,6 +285,8 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate, #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES uint8_t mRemainingAttempts = 0; uint8_t mAttemptsDone = 0; + + Callback::CallbackDeque mConnectionRetry; #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES void MoveToState(State aTargetState); @@ -313,14 +332,22 @@ class DLL_EXPORT OperationalSessionSetup : public SessionDelegate, #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES /** - * Schedule a setup reattempt, if possible. + * Schedule a setup reattempt, if possible. The outparam indicates how long + * it will be before the reattempt happens. */ - CHIP_ERROR ScheduleSessionSetupReattempt(); + CHIP_ERROR ScheduleSessionSetupReattempt(System::Clock::Seconds16 & timerDelay); /** * Helper for our backoff retry timer. */ static void TrySetupAgain(System::Layer * systemLayer, void * state); + + /** + * Helper to notify our retry callbacks that a setup error occurred and we + * will retry. + */ + void NotifyRetryHandlers(CHIP_ERROR error, const ReliableMessageProtocolConfig & remoteMrpConfig, + System::Clock::Seconds16 retryDelay); #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES }; diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp index f0caac0d738907..1cba88f8b97a0b 100644 --- a/src/controller/CHIPDeviceController.cpp +++ b/src/controller/CHIPDeviceController.cpp @@ -398,6 +398,9 @@ ControllerDeviceInitParams DeviceController::GetControllerDeviceInitParams() DeviceCommissioner::DeviceCommissioner() : mOnDeviceConnectedCallback(OnDeviceConnectedFn, this), mOnDeviceConnectionFailureCallback(OnDeviceConnectionFailureFn, this), +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + mOnDeviceConnectionRetryCallback(OnDeviceConnectionRetryFn, this), +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES mDeviceAttestationInformationVerificationCallback(OnDeviceAttestationInformationVerification, this), mDeviceNOCChainCallback(OnDeviceNOCChainGeneration, this), mSetUpCodePairer(this) { @@ -1731,6 +1734,60 @@ void DeviceCommissioner::OnDeviceConnectionFailureFn(void * context, const Scope } } +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES +// No specific action to take on either success or failure here; we're just +// trying to bump the fail-safe, and if that fails it's not clear there's much +// we can to with that. +static void OnExtendFailsafeForCASERetryFailure(void * context, CHIP_ERROR error) +{ + ChipLogError(Controller, "Failed to extend fail-safe for CASE retry: %" CHIP_ERROR_FORMAT, error.Format()); +} +static void +OnExtendFailsafeForCASERetrySuccess(void * context, + const app::Clusters::GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType & data) +{ + ChipLogProgress(Controller, "Status of extending fail-safe for CASE retry: %u", to_underlying(data.errorCode)); +} + +void DeviceCommissioner::OnDeviceConnectionRetryFn(void * context, const ScopedNodeId & peerId, CHIP_ERROR error, + System::Clock::Seconds16 retryTimeout) +{ + ChipLogError(Controller, + "Session establishment failed for " ChipLogFormatScopedNodeId ", error: %" CHIP_ERROR_FORMAT + ". Next retry expected to get a response to Sigma1 or fail within %d seconds", + ChipLogValueScopedNodeId(peerId), error.Format(), retryTimeout.count()); + + auto self = static_cast(context); + + // We need to do the fail-safe arming over the PASE session. + auto * commissioneeDevice = self->FindCommissioneeDevice(peerId.GetNodeId()); + if (!commissioneeDevice) + { + // Commissioning canceled, presumably. Just ignore the notification, + // not much we can do here. + return; + } + + // Extend by the default failsafe timeout plus our retry timeout, so we can + // be sure the fail-safe will not expire before we try the next time, if + // there will be a next time. + // + // TODO: Make it possible for our clients to control the exact timeout here? + uint16_t failsafeTimeout; + if (UINT16_MAX - retryTimeout.count() < kDefaultFailsafeTimeout) + { + failsafeTimeout = UINT16_MAX; + } + else + { + failsafeTimeout = static_cast(retryTimeout.count() + kDefaultFailsafeTimeout); + } + self->ExtendArmFailSafe(commissioneeDevice, CommissioningStage::kFindOperational, failsafeTimeout, + MakeOptional(kMinimumCommissioningStepTimeout), OnExtendFailsafeForCASERetrySuccess, + OnExtendFailsafeForCASERetryFailure); +} +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + // ClusterStateCache::Callback impl void DeviceCommissioner::OnDone(app::ReadClient *) { @@ -2444,7 +2501,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio &mOnDeviceConnectionFailureCallback #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES , - /* attemptCount = */ 3 + /* attemptCount = */ 3, &mOnDeviceConnectionRetryCallback #endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES ); } diff --git a/src/controller/CHIPDeviceController.h b/src/controller/CHIPDeviceController.h index d73950fd110b75..d60ad34768caf0 100644 --- a/src/controller/CHIPDeviceController.h +++ b/src/controller/CHIPDeviceController.h @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -787,6 +788,10 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, static void OnDeviceConnectedFn(void * context, Messaging::ExchangeManager & exchangeMgr, const SessionHandle & sessionHandle); static void OnDeviceConnectionFailureFn(void * context, const ScopedNodeId & peerId, CHIP_ERROR error); +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + static void OnDeviceConnectionRetryFn(void * context, const ScopedNodeId & peerId, CHIP_ERROR error, + System::Clock::Seconds16 retryTimeout); +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES static void OnDeviceAttestationInformationVerification(void * context, const Credentials::DeviceAttestationVerifier::AttestationInfo & info, @@ -897,6 +902,9 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, chip::Callback::Callback mOnDeviceConnectedCallback; chip::Callback::Callback mOnDeviceConnectionFailureCallback; +#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES + chip::Callback::Callback mOnDeviceConnectionRetryCallback; +#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES chip::Callback::Callback mDeviceAttestationInformationVerificationCallback; diff --git a/src/protocols/secure_channel/CASESession.cpp b/src/protocols/secure_channel/CASESession.cpp index 3e3cbda767847d..ac931c7ba5f135 100644 --- a/src/protocols/secure_channel/CASESession.cpp +++ b/src/protocols/secure_channel/CASESession.cpp @@ -125,8 +125,9 @@ static_assert(sizeof(kTBEData2_Nonce) == sizeof(kTBEData3_Nonce), "TBEData2_Nonc // // The session establishment fails if the response is not received within the resulting timeout window, // which accounts for both transport latency and the server-side latency. -static constexpr ExchangeContext::Timeout kExpectedLowProcessingTime = System::Clock::Seconds16(2); -static constexpr ExchangeContext::Timeout kExpectedHighProcessingTime = System::Clock::Seconds16(30); +static constexpr ExchangeContext::Timeout kExpectedLowProcessingTime = System::Clock::Seconds16(2); +static constexpr ExchangeContext::Timeout kExpectedSigma1ProcessingTime = kExpectedLowProcessingTime; +static constexpr ExchangeContext::Timeout kExpectedHighProcessingTime = System::Clock::Seconds16(30); CASESession::~CASESession() { @@ -273,7 +274,7 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric mSessionResumptionStorage = sessionResumptionStorage; mLocalMRPConfig = mrpLocalConfig; - mExchangeCtxt->UseSuggestedResponseTimeout(kExpectedLowProcessingTime); + mExchangeCtxt->UseSuggestedResponseTimeout(kExpectedSigma1ProcessingTime); mPeerNodeId = peerScopedNodeId.GetNodeId(); mLocalNodeId = fabricInfo->GetNodeId(); @@ -1974,4 +1975,13 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea return err; } +System::Clock::Timeout CASESession::ComputeSigma1ResponseTimeout(const ReliableMessageProtocolConfig & remoteMrpConfig) +{ + return GetRetransmissionTimeout(remoteMrpConfig.mActiveRetransTimeout, remoteMrpConfig.mIdleRetransTimeout, + // Assume peer is idle, since that's what we + // will assume for our initial message. + System::Clock::kZero, Transport::kMinActiveTime) + + kExpectedSigma1ProcessingTime; +} + } // namespace chip diff --git a/src/protocols/secure_channel/CASESession.h b/src/protocols/secure_channel/CASESession.h index 1800fef789d775..9cf2bba6ee651d 100644 --- a/src/protocols/secure_channel/CASESession.h +++ b/src/protocols/secure_channel/CASESession.h @@ -35,11 +35,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -175,6 +177,10 @@ class DLL_EXPORT CASESession : public Messaging::UnsolicitedMessageHandler, FabricIndex GetFabricIndex() const { return mFabricIndex; } + // Compute our Sigma1 response timeout. This can give consumers an idea of + // how long it will take to detect that our Sigma1 did not get through. + static System::Clock::Timeout ComputeSigma1ResponseTimeout(const ReliableMessageProtocolConfig & remoteMrpConfig); + // TODO: remove Clear, we should create a new instance instead reset the old instance. /** @brief This function zeroes out and resets the memory used by the object. **/