From 5c056e6101059b46e0b69d3c64e5f91b69404d88 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Tue, 14 Mar 2023 00:13:25 -0400 Subject: [PATCH] Extend our fail-safe a bit before we try to enable wifi/thread networks. (#25667) * Extend our fail-safe a bit before we try to enable wifi/thread networks. Two changes here: 1. Automatic fail-safe extension should not make the fail-safe _shorter_. This should prevent, for example, a short fail-safe on attestation failure overriding a longer initial fail-safe. 2. Before we try to enable network (which is the last time we're guaranteed to be able to talk to a device over PASE, for non-concurrent commissioning flow devices), extend our fail-safe to try to cover the time it will take us to do operational discovery and sending CommissioningComplete. * Address review comments. * Address review comments. --- src/app/DeviceProxy.h | 10 +- src/controller/AutoCommissioner.cpp | 14 ++- src/controller/CHIPDeviceController.cpp | 99 ++++++++++++++++--- src/controller/CHIPDeviceController.h | 12 ++- src/controller/CommissioneeDeviceProxy.cpp | 4 +- src/controller/CommissioningDelegate.cpp | 8 ++ src/controller/CommissioningDelegate.h | 42 ++++---- src/controller/python/OpCredsBinding.cpp | 2 + .../commissioning_failure_test.py | 4 +- 9 files changed, 151 insertions(+), 44 deletions(-) diff --git a/src/app/DeviceProxy.h b/src/app/DeviceProxy.h index 9d00845bcfc420..739c4bd4cb9177 100644 --- a/src/app/DeviceProxy.h +++ b/src/app/DeviceProxy.h @@ -30,6 +30,7 @@ #include #include #include +#include namespace chip { @@ -54,7 +55,12 @@ class DLL_EXPORT DeviceProxy virtual CHIP_ERROR SetPeerId(ByteSpan rcac, ByteSpan noc) { return CHIP_ERROR_NOT_IMPLEMENTED; } - const ReliableMessageProtocolConfig & GetRemoteMRPConfig() const { return mRemoteMRPConfig; } + /** + * Facilities for keeping track of the latest point we can expect the + * fail-safe to last through. These timestamp values use the monotonic clock. + */ + void SetFailSafeExpirationTimestamp(System::Clock::Timestamp timestamp) { mFailSafeExpirationTimestamp = timestamp; } + System::Clock::Timestamp GetFailSafeExpirationTimestamp() const { return mFailSafeExpirationTimestamp; } /** * @brief @@ -69,7 +75,7 @@ class DLL_EXPORT DeviceProxy protected: virtual bool IsSecureConnected() const = 0; - ReliableMessageProtocolConfig mRemoteMRPConfig = GetDefaultMRPConfig(); + System::Clock::Timestamp mFailSafeExpirationTimestamp = System::Clock::kZero; }; } // namespace chip diff --git a/src/controller/AutoCommissioner.cpp b/src/controller/AutoCommissioner.cpp index c83d561391b5b2..ff61e851a8547c 100644 --- a/src/controller/AutoCommissioner.cpp +++ b/src/controller/AutoCommissioner.cpp @@ -301,18 +301,21 @@ CommissioningStage AutoCommissioner::GetNextCommissioningStageInternal(Commissio } else { - return CommissioningStage::kWiFiNetworkEnable; + return CommissioningStage::kFailsafeBeforeWiFiEnable; } case CommissioningStage::kThreadNetworkSetup: if (mParams.GetWiFiCredentials().HasValue() && mDeviceCommissioningInfo.network.wifi.endpoint != kInvalidEndpointId) { - return CommissioningStage::kWiFiNetworkEnable; + return CommissioningStage::kFailsafeBeforeWiFiEnable; } else { - return CommissioningStage::kThreadNetworkEnable; + return CommissioningStage::kFailsafeBeforeThreadEnable; } - + case CommissioningStage::kFailsafeBeforeWiFiEnable: + return CommissioningStage::kWiFiNetworkEnable; + case CommissioningStage::kFailsafeBeforeThreadEnable: + return CommissioningStage::kThreadNetworkEnable; case CommissioningStage::kWiFiNetworkEnable: if (mParams.GetThreadOperationalDataset().HasValue() && mDeviceCommissioningInfo.network.thread.endpoint != kInvalidEndpointId) @@ -374,6 +377,9 @@ void AutoCommissioner::SetCASEFailsafeTimerIfNeeded() // CASE establishment, and receipt of the commissioning complete command. // We know that the mCommissioneeDeviceProxy is still valid at this point since it gets cleared during cleanup // and SetCASEFailsafeTimerIfNeeded is always called before that stage. + // + // A false return from ExtendArmFailSafe is fine; we don't want to make + // the fail-safe shorter here. mCommissioner->ExtendArmFailSafe(mCommissioneeDeviceProxy, CommissioningStage::kFindOperational, mParams.GetCASEFailsafeTimerSeconds().Value(), GetCommandTimeout(mCommissioneeDeviceProxy, CommissioningStage::kArmFailsafe), diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp index 1cba88f8b97a0b..e42a13aed4c5fd 100644 --- a/src/controller/CHIPDeviceController.cpp +++ b/src/controller/CHIPDeviceController.cpp @@ -1131,16 +1131,39 @@ void DeviceCommissioner::OnFailedToExtendedArmFailSafeDeviceAttestation(void * c commissioner->CommissioningStageComplete(CHIP_ERROR_INTERNAL, report); } -void DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, +bool DeviceCommissioner::ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, Optional commandTimeout, OnExtendFailsafeSuccess onSuccess, OnExtendFailsafeFailure onFailure) { + using namespace System; + using namespace System::Clock; + auto now = SystemClock().GetMonotonicTimestamp(); + auto newFailSafeTimeout = now + Seconds16(armFailSafeTimeout); + if (newFailSafeTimeout < proxy->GetFailSafeExpirationTimestamp()) + { + ChipLogProgress( + Controller, "Skipping arming failsafe: new time (%u seconds from now) before old time (%u seconds from now)", + armFailSafeTimeout, std::chrono::duration_cast(proxy->GetFailSafeExpirationTimestamp() - now).count()); + return false; + } + uint64_t breadcrumb = static_cast(step); GeneralCommissioning::Commands::ArmFailSafe::Type request; request.expiryLengthSeconds = armFailSafeTimeout; request.breadcrumb = breadcrumb; ChipLogProgress(Controller, "Arming failsafe (%u seconds)", request.expiryLengthSeconds); - SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout); + CHIP_ERROR err = SendCommand(proxy, request, onSuccess, onFailure, kRootEndpointId, commandTimeout); + if (err != CHIP_NO_ERROR) + { + onFailure(this, err); + } + else + { + // TODO: Handle the situation when our command ends up erroring out + // asynchronously? + proxy->SetFailSafeExpirationTimestamp(newFailSafeTimeout); + } + return true; } void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials::DeviceAttestationVerifier::AttestationInfo & info, @@ -1154,20 +1177,24 @@ void DeviceCommissioner::ExtendArmFailSafeForDeviceAttestation(const Credentials mAttestationDeviceInfo = Platform::MakeUnique(info); auto expiryLengthSeconds = deviceAttestationDelegate->FailSafeExpiryTimeoutSecs(); - if (expiryLengthSeconds.HasValue()) + bool failSafeSkipped = expiryLengthSeconds.HasValue(); + if (failSafeSkipped) { - GeneralCommissioning::Commands::ArmFailSafe::Type request; - request.expiryLengthSeconds = expiryLengthSeconds.Value(); - request.breadcrumb = mCommissioningStage; - ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", request.expiryLengthSeconds); + ChipLogProgress(Controller, "Changing fail-safe timer to %u seconds to handle DA failure", expiryLengthSeconds.Value()); // Per spec, anything we do with the fail-safe armed must not time out // in less than kMinimumCommissioningStepTimeout. - SendCommand(mDeviceBeingCommissioned, request, OnArmFailSafeExtendedForDeviceAttestation, - OnFailedToExtendedArmFailSafeDeviceAttestation, MakeOptional(kMinimumCommissioningStepTimeout)); + failSafeSkipped = + ExtendArmFailSafe(mDeviceBeingCommissioned, mCommissioningStage, expiryLengthSeconds.Value(), + MakeOptional(kMinimumCommissioningStepTimeout), OnArmFailSafeExtendedForDeviceAttestation, + OnFailedToExtendedArmFailSafeDeviceAttestation); } else { ChipLogProgress(Controller, "Proceeding without changing fail-safe timer value as delegate has not set it"); + } + + if (failSafeSkipped) + { // Callee does not use data argument. const GeneralCommissioning::Commands::ArmFailSafeResponse::DecodableType data; OnArmFailSafeExtendedForDeviceAttestation(this, data); @@ -1782,6 +1809,8 @@ void DeviceCommissioner::OnDeviceConnectionRetryFn(void * context, const ScopedN { failsafeTimeout = static_cast(retryTimeout.count() + kDefaultFailsafeTimeout); } + // A false return from ExtendArmFailSafe is fine; we don't want to make the + // fail-safe shorter here. self->ExtendArmFailSafe(commissioneeDevice, CommissioningStage::kFindOperational, failsafeTimeout, MakeOptional(kMinimumCommissioningStepTimeout), OnExtendFailsafeForCASERetrySuccess, OnExtendFailsafeForCASERetryFailure); @@ -2168,8 +2197,11 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio { case CommissioningStage::kArmFailsafe: { VerifyOrDie(endpoint == kRootEndpointId); - ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout, OnArmFailSafe, - OnBasicFailure); + // Make sure the fail-safe value we set here actually ends up being used + // no matter what. + proxy->SetFailSafeExpirationTimestamp(System::Clock::kZero); + VerifyOrDie(ExtendArmFailSafe(proxy, step, params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout), timeout, + OnArmFailSafe, OnBasicFailure)); } break; case CommissioningStage::kReadCommissioningInfo: { @@ -2464,6 +2496,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout); } break; + case CommissioningStage::kFailsafeBeforeWiFiEnable: + FALLTHROUGH; + case CommissioningStage::kFailsafeBeforeThreadEnable: + // Before we try to do network enablement, make sure that our fail-safe + // is set far enough out that we can later try to do operational + // discovery without it timing out. + ExtendFailsafeBeforeNetworkEnable(proxy, params, step); + break; case CommissioningStage::kWiFiNetworkEnable: { if (!params.GetWiFiCredentials().HasValue()) { @@ -2522,6 +2562,43 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } } +void DeviceCommissioner::ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params, + CommissioningStage step) +{ + auto * commissioneeDevice = FindCommissioneeDevice(device->GetDeviceId()); + if (device != commissioneeDevice) + { + // Not a commissionee device; just return. + ChipLogError(Controller, "Trying to extend fail-safe for an unknown commissionee with device id " ChipLogFormatX64, + ChipLogValueX64(device->GetDeviceId())); + CommissioningStageComplete(CHIP_ERROR_INCORRECT_STATE, CommissioningDelegate::CommissioningReport()); + return; + } + + // Try to make sure we have at least enough time for our expected + // commissioning bits plus the MRP retries for a Sigma1. + uint16_t failSafeTimeoutSecs = params.GetFailsafeTimerSeconds().ValueOr(kDefaultFailsafeTimeout); + auto sigma1Timeout = CASESession::ComputeSigma1ResponseTimeout(commissioneeDevice->GetPairing().GetRemoteMRPConfig()); + uint16_t sigma1TimeoutSecs = std::chrono::duration_cast(sigma1Timeout).count(); + if (UINT16_MAX - failSafeTimeoutSecs < sigma1TimeoutSecs) + { + failSafeTimeoutSecs = UINT16_MAX; + } + else + { + failSafeTimeoutSecs = static_cast(failSafeTimeoutSecs + sigma1TimeoutSecs); + } + + // A false return from ExtendArmFailSafe is fine; we don't want to make the + // fail-safe shorter here. + if (!ExtendArmFailSafe(commissioneeDevice, step, failSafeTimeoutSecs, MakeOptional(kMinimumCommissioningStepTimeout), + OnArmFailSafe, OnBasicFailure)) + { + // Just move on to the next step. + CommissioningStageComplete(CHIP_NO_ERROR, CommissioningDelegate::CommissioningReport()); + } +} + CHIP_ERROR DeviceController::GetCompressedFabricIdBytes(MutableByteSpan & outBytes) const { const auto * fabricInfo = GetFabricInfo(); diff --git a/src/controller/CHIPDeviceController.h b/src/controller/CHIPDeviceController.h index d60ad34768caf0..04c24db25a0eea 100644 --- a/src/controller/CHIPDeviceController.h +++ b/src/controller/CHIPDeviceController.h @@ -683,8 +683,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, return mDefaultCommissioner == nullptr ? NullOptional : MakeOptional(mDefaultCommissioner->GetCommissioningParameters()); } - // Reset the arm failsafe timer during commissioning. - void ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, + // Reset the arm failsafe timer during commissioning. If this returns + // false, that means that the timer was already set for a longer time period + // than the new time we are trying to set. In this case, neither + // onSuccess nor onFailure will be called. + bool ExtendArmFailSafe(DeviceProxy * proxy, CommissioningStage step, uint16_t armFailSafeTimeout, Optional commandTimeout, OnExtendFailsafeSuccess onSuccess, OnExtendFailsafeFailure onFailure); @@ -900,6 +903,11 @@ class DLL_EXPORT DeviceCommissioner : public DeviceController, // to the PairingDelegate upon arm failsafe command completion. void CleanupCommissioning(DeviceProxy * proxy, NodeId nodeId, const CompletionStatus & completionStatus); + // Extend the fail-safe before trying to do network-enable (since after that + // point, for non-concurrent-commissioning devices, we may not have a way to + // extend it). + void ExtendFailsafeBeforeNetworkEnable(DeviceProxy * device, CommissioningParameters & params, CommissioningStage step); + chip::Callback::Callback mOnDeviceConnectedCallback; chip::Callback::Callback mOnDeviceConnectionFailureCallback; #if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES diff --git a/src/controller/CommissioneeDeviceProxy.cpp b/src/controller/CommissioneeDeviceProxy.cpp index f809c3785094a3..89c237b5d46e7d 100644 --- a/src/controller/CommissioneeDeviceProxy.cpp +++ b/src/controller/CommissioneeDeviceProxy.cpp @@ -71,11 +71,9 @@ CHIP_ERROR CommissioneeDeviceProxy::UpdateDeviceData(const Transport::PeerAddres { mDeviceAddress = addr; - mRemoteMRPConfig = config; - // Initialize PASE session state with any MRP parameters that DNS-SD has provided. // It can be overridden by PASE session protocol messages that include MRP parameters. - mPairing.SetRemoteMRPConfig(mRemoteMRPConfig); + mPairing.SetRemoteMRPConfig(config); if (!mSecureSession) { diff --git a/src/controller/CommissioningDelegate.cpp b/src/controller/CommissioningDelegate.cpp index b6b83980130e98..7fcdeec16a1e32 100644 --- a/src/controller/CommissioningDelegate.cpp +++ b/src/controller/CommissioningDelegate.cpp @@ -93,6 +93,14 @@ const char * StageToString(CommissioningStage stage) return "ThreadNetworkSetup"; break; + case kFailsafeBeforeWiFiEnable: + return "FailsafeBeforeWiFiEnable"; + break; + + case kFailsafeBeforeThreadEnable: + return "FailsafeBeforeThreadEnable"; + break; + case kWiFiNetworkEnable: return "WiFiNetworkEnable"; break; diff --git a/src/controller/CommissioningDelegate.h b/src/controller/CommissioningDelegate.h index ccc8954cb842a2..d34f8fff234526 100644 --- a/src/controller/CommissioningDelegate.h +++ b/src/controller/CommissioningDelegate.h @@ -32,26 +32,28 @@ class DeviceCommissioner; enum CommissioningStage : uint8_t { kError, - kSecurePairing, ///< Establish a PASE session with the device - kReadCommissioningInfo, ///< Query General Commissioning Attributes and Network Features - kArmFailsafe, ///< Send ArmFailSafe (0x30:0) command to the device - kConfigRegulatory, ///< Send SetRegulatoryConfig (0x30:2) command to the device - kSendPAICertificateRequest, ///< Send PAI CertificateChainRequest (0x3E:2) command to the device - kSendDACCertificateRequest, ///< Send DAC CertificateChainRequest (0x3E:2) command to the device - kSendAttestationRequest, ///< Send AttestationRequest (0x3E:0) command to the device - kAttestationVerification, ///< Verify AttestationResponse (0x3E:1) validity - kSendOpCertSigningRequest, ///< Send CSRRequest (0x3E:4) command to the device - kValidateCSR, ///< Verify CSRResponse (0x3E:5) validity - kGenerateNOCChain, ///< TLV encode Node Operational Credentials (NOC) chain certs - kSendTrustedRootCert, ///< Send AddTrustedRootCertificate (0x3E:11) command to the device - kSendNOC, ///< Send AddNOC (0x3E:6) command to the device - kWiFiNetworkSetup, ///< Send AddOrUpdateWiFiNetwork (0x31:2) command to the device - kThreadNetworkSetup, ///< Send AddOrUpdateThreadNetwork (0x31:3) command to the device - kWiFiNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the WiFi network - kThreadNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the Thread network - kFindOperational, ///< Perform operational discovery and establish a CASE session with the device - kSendComplete, ///< Send CommissioningComplete (0x30:4) command to the device - kCleanup, ///< Call delegates with status, free memory, clear timers and state + kSecurePairing, ///< Establish a PASE session with the device + kReadCommissioningInfo, ///< Query General Commissioning Attributes and Network Features + kArmFailsafe, ///< Send ArmFailSafe (0x30:0) command to the device + kConfigRegulatory, ///< Send SetRegulatoryConfig (0x30:2) command to the device + kSendPAICertificateRequest, ///< Send PAI CertificateChainRequest (0x3E:2) command to the device + kSendDACCertificateRequest, ///< Send DAC CertificateChainRequest (0x3E:2) command to the device + kSendAttestationRequest, ///< Send AttestationRequest (0x3E:0) command to the device + kAttestationVerification, ///< Verify AttestationResponse (0x3E:1) validity + kSendOpCertSigningRequest, ///< Send CSRRequest (0x3E:4) command to the device + kValidateCSR, ///< Verify CSRResponse (0x3E:5) validity + kGenerateNOCChain, ///< TLV encode Node Operational Credentials (NOC) chain certs + kSendTrustedRootCert, ///< Send AddTrustedRootCertificate (0x3E:11) command to the device + kSendNOC, ///< Send AddNOC (0x3E:6) command to the device + kWiFiNetworkSetup, ///< Send AddOrUpdateWiFiNetwork (0x31:2) command to the device + kThreadNetworkSetup, ///< Send AddOrUpdateThreadNetwork (0x31:3) command to the device + kFailsafeBeforeWiFiEnable, ///< Extend the fail-safe before doing kWiFiNetworkEnable + kFailsafeBeforeThreadEnable, ///< Extend the fail-safe before doing kThreadNetworkEnable + kWiFiNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the WiFi network + kThreadNetworkEnable, ///< Send ConnectNetwork (0x31:6) command to the device for the Thread network + kFindOperational, ///< Perform operational discovery and establish a CASE session with the device + kSendComplete, ///< Send CommissioningComplete (0x30:4) command to the device + kCleanup, ///< Call delegates with status, free memory, clear timers and state /// Send ScanNetworks (0x31:0) command to the device. /// ScanNetworks can happen anytime after kArmFailsafe. /// However, the cirque tests fail if it is earlier in the list diff --git a/src/controller/python/OpCredsBinding.cpp b/src/controller/python/OpCredsBinding.cpp index 3c4da74f904683..55bb8094863682 100644 --- a/src/controller/python/OpCredsBinding.cpp +++ b/src/controller/python/OpCredsBinding.cpp @@ -297,12 +297,14 @@ class TestCommissioner : public chip::Controller::AutoCommissioner { if (!mIsWifi && (stage == chip::Controller::CommissioningStage::kWiFiNetworkEnable || + stage == chip::Controller::CommissioningStage::kFailsafeBeforeWiFiEnable || stage == chip::Controller::CommissioningStage::kWiFiNetworkSetup)) { return false; } if (!mIsThread && (stage == chip::Controller::CommissioningStage::kThreadNetworkEnable || + stage == chip::Controller::CommissioningStage::kFailsafeBeforeThreadEnable || stage == chip::Controller::CommissioningStage::kThreadNetworkSetup)) { return false; diff --git a/src/controller/python/test/test_scripts/commissioning_failure_test.py b/src/controller/python/test/test_scripts/commissioning_failure_test.py index 8b9edbfee514d1..4681dd108d758a 100755 --- a/src/controller/python/test/test_scripts/commissioning_failure_test.py +++ b/src/controller/python/test/test_scripts/commissioning_failure_test.py @@ -96,7 +96,7 @@ def main(): # TODO: Start at stage 2 once handling for arming failsafe on pase is done. if options.report: - for testFailureStage in range(3, 18): + for testFailureStage in range(3, 20): FailIfNot(test.TestPaseOnly(ip=options.deviceAddress1, setuppin=20202021, nodeid=1), @@ -105,7 +105,7 @@ def main(): "Commissioning failure tests failed for simulated report failure on stage {}".format(testFailureStage)) else: - for testFailureStage in range(3, 18): + for testFailureStage in range(3, 20): FailIfNot(test.TestPaseOnly(ip=options.deviceAddress1, setuppin=20202021, nodeid=1),