From e4349e40a2e110f63516208d89e81bc773df23db Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Mon, 13 Mar 2023 16:34:09 -0400 Subject: [PATCH] Extend our fail-safe a bit before we try to enable wifi/thread networks. Two changes here: 1. Automatic fail-safe extension should not make the fail-safe _shorter_. This should prevent, for example, a short fail-safe on attestation failure overriding a longer initial fail-safe. 2. Before we try to enable network (which is the last time we're guaranteed to be able to talk to a device over PASE, for non-concurrent commissioning flow devices), extend our fail-safe to try to cover the time it will take us to do operational discovery and sending CommissioningComplete. --- src/app/DeviceProxy.h | 10 +- src/controller/AutoCommissioner.cpp | 3 + src/controller/CHIPDeviceController.cpp | 108 ++++++++++++++++++--- src/controller/CHIPDeviceController.h | 12 ++- src/controller/CommissioneeDeviceProxy.cpp | 4 +- 5 files changed, 119 insertions(+), 18 deletions(-) diff --git a/src/app/DeviceProxy.h b/src/app/DeviceProxy.h index 9d00845bcfc420..739c4bd4cb9177 100644 --- a/src/app/DeviceProxy.h +++ b/src/app/DeviceProxy.h @@ -30,6 +30,7 @@ #include #include #include +#include namespace chip { @@ -54,7 +55,12 @@ class DLL_EXPORT DeviceProxy virtual CHIP_ERROR SetPeerId(ByteSpan rcac, ByteSpan noc) { return CHIP_ERROR_NOT_IMPLEMENTED; } - const ReliableMessageProtocolConfig & GetRemoteMRPConfig() const { return mRemoteMRPConfig; } + /** + * Facilities for keeping track of the latest point we can expect the + * fail-safe to last through. These timestamp values use the monotonic clock. + */ + void SetFailSafeExpirationTimestamp(System::Clock::Timestamp timestamp) { mFailSafeExpirationTimestamp = timestamp; } + System::Clock::Timestamp GetFailSafeExpirationTimestamp() const { return mFailSafeExpirationTimestamp; } /** * @brief @@ -69,7 +75,7 @@ class DLL_EXPORT DeviceProxy protected: virtual bool IsSecureConnected() const = 0; - ReliableMessageProtocolConfig mRemoteMRPConfig = GetDefaultMRPConfig(); + System::Clock::Timestamp mFailSafeExpirationTimestamp = System::Clock::kZero; }; } // namespace chip diff --git a/src/controller/AutoCommissioner.cpp b/src/controller/AutoCommissioner.cpp index c83d561391b5b2..b458fbe8845eb1 100644 --- a/src/controller/AutoCommissioner.cpp +++ b/src/controller/AutoCommissioner.cpp @@ -374,6 +374,9 @@ void AutoCommissioner::SetCASEFailsafeTimerIfNeeded() // CASE establishment, and receipt of the commissioning complete command. // We know that the mCommissioneeDeviceProxy is still valid at this point since it gets cleared during cleanup // and SetCASEFailsafeTimerIfNeeded is always called before that stage. + // + // A false return from ExtendArmFailSafe is fine; we don't want to make + // the fail-safe shorter here. mCommissioner->ExtendArmFailSafe(mCommissioneeDeviceProxy, CommissioningStage::kFindOperational, mParams.GetCASEFailsafeTimerSeconds().Value(), GetCommandTimeout(mCommissioneeDeviceProxy, CommissioningStage::kArmFailsafe), diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp index 1cba88f8b97a0b..43817af23e8b9c 100644 --- a/src/controller/CHIPDeviceController.cpp +++ b/src/controller/CHIPDeviceController.cpp @@ -1131,16 +1131,42 @@ void DeviceCommissioner::OnFailedToExtendedArmFailSafeDeviceAttestation(void * c commissioner->CommissioningStageComplete(CHIP_ERROR_INTERNAL, report); } -void DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, +bool DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, Optional commandTimeout, OnExtendFailsafeSuccess onSuccess, OnExtendFailsafeFailure onFailure) { + using namespace System; + using namespace System::Clock; + auto now = SystemClock().GetMonotonicTimestamp(); + auto newFailSafeTimeout = now + Seconds16(armFailSafeTimeout); + if (newFailSafeTimeout < proxy->GetFailSafeExpirationTimestamp()) + { + ChipLogProgress( + Controller, "Skipping arming failsafe: new time (%u seconds from now) before old time (%u seconds from now)", + armFailSafeTimeout, std::chrono::duration_cast(proxy->GetFailSafeExpirationTimestamp() - now).count()); + return false; + } + uint64_t breadcrumb = static_cast(step); GeneralCommissioning::Commands::ArmFailSafe::Type request; request.expiryLengthSeconds = armFailSafeTimeout; request.breadcrumb = breadcrumb; ChipLogProgress(Controller, "Arming failsafe (%u seconds)", request.expiryLengthSeconds); - SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout); + CHIP_ERROR err = SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout); + if (err != CHIP_NO_ERROR) + { + // TODO: What's the right error-handling here? We used to just silently + // ignore this failure... Scheduling a lambda to call + // onFailure(this, err) is hard, because a lambda we can schedule can't + // capture that much state. + } + else + { + // TODO: Handle the situation when our command ends up erroring out + // asynchronously? + proxy->SetFailSafeExpirationTimestamp(newFailSafeTimeout); + } + return true; } void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials::DeviceAttestationVerifier::AttestationInfo & info, @@ -1154,20 +1180,24 @@ void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials mAttestationDeviceInfo = Platform::MakeUnique(info); auto expiryLengthSeconds = deviceAttestationDelegate->FailSafeExpiryTimeoutSecs(); - if (expiryLengthSeconds.HasValue()) + bool failSafeSkipped = expiryLengthSeconds.HasValue(); + if (failSafeSkipped) { - GeneralCommissioning::Commands::ArmFailSafe::Type request; - request.expiryLengthSeconds = expiryLengthSeconds.Value(); - request.breadcrumb = mCommissioningStage; - ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", request.expiryLengthSeconds); + ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", expiryLengthSeconds.Value()); // Per spec, anything we do with the fail-safe armed must not time out // in less than kMinimumCommissioningStepTimeout. - SendCommand(mDeviceBeingCommissioned, request, OnArmFailSafeExtendedForDeviceAttestation, - OnFailedToExtendedArmFailSafeDeviceAttestation, MakeOptional(kMinimumCommissioningStepTimeout)); + failSafeSkipped = + ExtendArmFailSafe(mDeviceBeingCommissioned, mCommissioningStage, expiryLengthSeconds.Value(), + MakeOptional(kMinimumCommissioningStepTimeout), OnArmFailSafeExtendedForDeviceAttestation, + OnFailedToExtendedArmFailSafeDeviceAttestation); } else { ChipLogProgress(Controller, "Proceeding without changing fail-safe timer value as delegate has not set it"); + } + + if (failSafeSkipped) + { // Callee does not use data argument. const GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType data; OnArmFailSafeExtendedForDeviceAttestation(this, data); @@ -1782,6 +1812,8 @@ void DeviceCommissioner::OnDeviceConnectionRetryFn(void * context, const ScopedN { failsafeTimeout = static_cast(retryTimeout.count() + kDefaultFailsafeTimeout); } + // A false return from ExtendArmFailSafe is fine; we don't want to make the + // fail-safe shorter here. self->ExtendArmFailSafe(commissioneeDevice, CommissioningStage::kFindOperational, failsafeTimeout, MakeOptional(kMinimumCommissioningStepTimeout), OnExtendFailsafeForCASERetrySuccess, OnExtendFailsafeForCASERetryFailure); @@ -2168,8 +2200,11 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio { case CommissioningStage::kArmFailsafe: { VerifyOrDie(endpoint == kRootEndpointId); - ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout, OnArmFailSafe, - OnBasicFailure); + // Make sure the fail-safe value we set here actually ends up being used + // no matter what. + proxy->SetFailSafeExpirationTimestamp(System::Clock::kZero); + VerifyOrDie(ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout, + OnArmFailSafe, OnBasicFailure)); } break; case CommissioningStage::kReadCommissioningInfo: { @@ -2471,6 +2506,10 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio CommissioningStageComplete(CHIP_ERROR_INVALID_ARGUMENT); return; } + // Before we try to do network enablement, make sure that our fail-safe + // is set far enough out that we can later try to do operational + // discovery without it timing out. + ExtendFailsafeBeforeNetworkEnable(proxy, params, step); NetworkCommissioning::Commands::ConnectNetwork::Type request; request.networkID = params.GetWiFiCredentials().Value().ssid; request.breadcrumb.Emplace(breadcrumb); @@ -2488,6 +2527,10 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio CommissioningStageComplete(CHIP_ERROR_INVALID_ARGUMENT); return; } + // Before we try to do network enablement, make sure that our fail-safe + // is set far enough out that we can later try to do operational + // discovery without it timing out. + ExtendFailsafeBeforeNetworkEnable(proxy, params, step); NetworkCommissioning::Commands::ConnectNetwork::Type request; request.networkID = extendedPanId; request.breadcrumb.Emplace(breadcrumb); @@ -2522,6 +2565,49 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } } +// No specific action to take on either success or failure here; we're just +// trying to bump the fail-safe, and if that fails it's not clear there's much +// we can to with that. +static void OnExtendFailsafeBeforeNetworkEnableFailure(void * context, CHIP_ERROR error) +{ + ChipLogError(Controller, "Failed to extend fail-safe before network enable: %" CHIP_ERROR_FORMAT, error.Format()); +} +static void OnExtendFailsafeBeforeNetworkEnableSuccess( + void * context, const app::Clusters::GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType & data) +{ + ChipLogProgress(Controller, "Status of extending fail-safe before network enable: %u", to_underlying(data.errorCode)); +} + +void DeviceCommissioner::ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params, + CommissioningStage step) +{ + auto * commissioneeDevice = FindCommissioneeDevice(device->GetDeviceId()); + if (device != commissioneeDevice) + { + // Not a commissionee device; just return. + return; + } + + // Try to make sure we have at least enough time for our expected + // commissioning bits plus the MRP retries for a Sigma1. + uint16_t failSafeTimeoutSecs = params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout); + auto sigma1Timeout = CASESession::ComputeSigma1ResponseTimeout(commissioneeDevice->GetPairing().GetRemoteMRPConfig()); + uint16_t sigma1TimeoutSecs = std::chrono::duration_cast(sigma1Timeout).count(); + if (UINT16_MAX - failSafeTimeoutSecs < sigma1TimeoutSecs) + { + failSafeTimeoutSecs = UINT16_MAX; + } + else + { + failSafeTimeoutSecs += sigma1TimeoutSecs; + } + + // A false return from ExtendArmFailSafe is fine; we don't want to make the + // fail-safe shorter here. + ExtendArmFailSafe(commissioneeDevice, step, failSafeTimeoutSecs, MakeOptional(kMinimumCommissioningStepTimeout), + OnExtendFailsafeBeforeNetworkEnableSuccess, OnExtendFailsafeBeforeNetworkEnableFailure); +} + CHIP_ERROR DeviceController::GetCompressedFabricIdBytes(MutableByteSpan & outBytes) const { const auto * fabricInfo = GetFabricInfo(); diff --git a/src/controller/CHIPDeviceController.h b/src/controller/CHIPDeviceController.h index d60ad34768caf0..04c24db25a0eea 100644 --- a/src/controller/CHIPDeviceController.h +++ b/src/controller/CHIPDeviceController.h @@ -683,8 +683,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, return mDefaultCommissioner == nullptr ? NullOptional : MakeOptional(mDefaultCommissioner->GetCommissioningParameters()); } - // Reset the arm failsafe timer during commissioning. - void ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, + // Reset the arm failsafe timer during commissioning. If this returns + // false, that means that the timer was already set for a longer time period + // than the new time we are trying to set. In this case, neither + // onSuccess nor onFailure will be called. + bool ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, Optional commandTimeout, OnExtendFailsafeSuccess onSuccess, OnExtendFailsafeFailure onFailure); @@ -900,6 +903,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, // to the PairingDelegate upon arm failsafe command completion. void CleanupCommissioning(DeviceProxy * proxy, NodeId nodeId, const CompletionStatus & completionStatus); + // Extend the fail-safe before trying to do network-enable (since after that + // point, for non-concurrent-commissioning devices, we may not have a way to + // extend it). + void ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params, CommissioningStage step); + chip::Callback::Callback mOnDeviceConnectedCallback; chip::Callback::Callback mOnDeviceConnectionFailureCallback; #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES diff --git a/src/controller/CommissioneeDeviceProxy.cpp b/src/controller/CommissioneeDeviceProxy.cpp index f809c3785094a3..89c237b5d46e7d 100644 --- a/src/controller/CommissioneeDeviceProxy.cpp +++ b/src/controller/CommissioneeDeviceProxy.cpp @@ -71,11 +71,9 @@ CHIP_ERROR CommissioneeDeviceProxy::UpdateDeviceData(const Transport::PeerAddres { mDeviceAddress = addr; - mRemoteMRPConfig = config; - // Initialize PASE session state with any MRP parameters that DNS-SD has provided. // It can be overridden by PASE session protocol messages that include MRP parameters. - mPairing.SetRemoteMRPConfig(mRemoteMRPConfig); + mPairing.SetRemoteMRPConfig(config); if (!mSecureSession) {