diff --git a/src/app/BUILD.gn b/src/app/BUILD.gn index 7ef2fd303ac445..0a3a865a678e7d 100644 --- a/src/app/BUILD.gn +++ b/src/app/BUILD.gn @@ -80,6 +80,8 @@ static_library("app") { "DeviceProxy.h", "EventManagement.cpp", "EventPathParams.h", + "FailSafeContext.cpp", + "FailSafeContext.h", "GlobalAttributes.h", "InteractionModelEngine.cpp", "InteractionModelRevision.h", diff --git a/src/platform/FailSafeContext.cpp b/src/app/FailSafeContext.cpp similarity index 53% rename from src/platform/FailSafeContext.cpp rename to src/app/FailSafeContext.cpp index 376881b05fab51..f85e538d2e5543 100644 --- a/src/platform/FailSafeContext.cpp +++ b/src/app/FailSafeContext.cpp @@ -20,20 +20,15 @@ * Provides the implementation of the FailSafeContext object. */ -#include - -#include #include -#include +#include -namespace chip { -namespace DeviceLayer { +#include "FailSafeContext.h" + +using namespace chip::DeviceLayer; -namespace { -constexpr TLV::Tag kFabricIndexTag = TLV::ContextTag(0); -constexpr TLV::Tag kAddNocCommandTag = TLV::ContextTag(1); -constexpr TLV::Tag kUpdateNocCommandTag = TLV::ContextTag(2); -} // anonymous namespace +namespace chip { +namespace app { void FailSafeContext::HandleArmFailSafeTimer(System::Layer * layer, void * aAppState) { @@ -89,7 +84,7 @@ void FailSafeContext::ScheduleFailSafeCleanup(FabricIndex fabricIndex, bool addN PlatformMgr().ScheduleWork(HandleDisarmFailSafe, reinterpret_cast(this)); } -CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Timeout expiryLength) +CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Seconds16 expiryLengthSeconds) { CHIP_ERROR err = CHIP_NO_ERROR; bool cancelTimersIfError = false; @@ -100,9 +95,8 @@ CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System cancelTimersIfError = true; } - SuccessOrExit(err = DeviceLayer::SystemLayer().StartTimer(expiryLength, HandleArmFailSafeTimer, this)); - SuccessOrExit(err = CommitToStorage()); - SuccessOrExit(err = ConfigurationMgr().SetFailSafeArmed(true)); + SuccessOrExit( + err = DeviceLayer::SystemLayer().StartTimer(System::Clock::Seconds16(expiryLengthSeconds), HandleArmFailSafeTimer, this)); mFailSafeArmed = true; mFabricIndex = accessingFabricIndex; @@ -124,96 +118,9 @@ void FailSafeContext::DisarmFailSafe() ResetState(); - if (ConfigurationMgr().SetFailSafeArmed(false) != CHIP_NO_ERROR) - { - ChipLogError(FailSafe, "Failed to set FailSafeArmed config to false"); - } - - if (DeleteFromStorage() != CHIP_NO_ERROR) - { - ChipLogError(FailSafe, "Failed to delete FailSafeContext from configuration"); - } - ChipLogProgress(FailSafe, "Fail-safe cleanly disarmed"); } -CHIP_ERROR FailSafeContext::SetAddNocCommandInvoked(FabricIndex nocFabricIndex) -{ - mAddNocCommandHasBeenInvoked = true; - mFabricIndex = nocFabricIndex; - - ReturnErrorOnFailure(CommitToStorage()); - - return CHIP_NO_ERROR; -} - -CHIP_ERROR FailSafeContext::SetUpdateNocCommandInvoked() -{ - mUpdateNocCommandHasBeenInvoked = true; - - ReturnErrorOnFailure(CommitToStorage()); - - return CHIP_NO_ERROR; -} - -CHIP_ERROR FailSafeContext::CommitToStorage() -{ - DefaultStorageKeyAllocator keyAlloc; - uint8_t buf[FailSafeContextTLVMaxSize()]; - TLV::TLVWriter writer; - writer.Init(buf); - - TLV::TLVType outerType; - ReturnErrorOnFailure(writer.StartContainer(TLV::AnonymousTag(), TLV::kTLVType_Structure, outerType)); - ReturnErrorOnFailure(writer.Put(kFabricIndexTag, mFabricIndex)); - - // TODO: Stop storing this, and just make the fail-safe context volatile and sweep-up stale data next boot on partial commits - ReturnErrorOnFailure(writer.Put(kAddNocCommandTag, mAddNocCommandHasBeenInvoked)); - ReturnErrorOnFailure(writer.Put(kUpdateNocCommandTag, mUpdateNocCommandHasBeenInvoked)); - ReturnErrorOnFailure(writer.EndContainer(outerType)); - - const auto failSafeContextTLVLength = writer.GetLengthWritten(); - VerifyOrReturnError(CanCastTo(failSafeContextTLVLength), CHIP_ERROR_BUFFER_TOO_SMALL); - - return PersistedStorage::KeyValueStoreMgr().Put(keyAlloc.FailSafeContextKey(), buf, - static_cast(failSafeContextTLVLength)); -} - -CHIP_ERROR FailSafeContext::LoadFromStorage(FabricIndex & fabricIndex, bool & addNocCommandInvoked, bool & updateNocCommandInvoked) -{ - DefaultStorageKeyAllocator keyAlloc; - uint8_t buf[FailSafeContextTLVMaxSize()]; - ReturnErrorOnFailure(PersistedStorage::KeyValueStoreMgr().Get(keyAlloc.FailSafeContextKey(), buf, sizeof(buf))); - - TLV::ContiguousBufferTLVReader reader; - reader.Init(buf, sizeof(buf)); - ReturnErrorOnFailure(reader.Next(TLV::kTLVType_Structure, TLV::AnonymousTag())); - - TLV::TLVType containerType; - ReturnErrorOnFailure(reader.EnterContainer(containerType)); - - ReturnErrorOnFailure(reader.Next(kFabricIndexTag)); - ReturnErrorOnFailure(reader.Get(fabricIndex)); - - ReturnErrorOnFailure(reader.Next(kAddNocCommandTag)); - ReturnErrorOnFailure(reader.Get(addNocCommandInvoked)); - - ReturnErrorOnFailure(reader.Next(kUpdateNocCommandTag)); - ReturnErrorOnFailure(reader.Get(updateNocCommandInvoked)); - - ReturnErrorOnFailure(reader.VerifyEndOfContainer()); - ReturnErrorOnFailure(reader.ExitContainer(containerType)); - - return CHIP_NO_ERROR; -} - -CHIP_ERROR FailSafeContext::DeleteFromStorage() -{ - DefaultStorageKeyAllocator keyAlloc; - - return PersistedStorage::KeyValueStoreMgr().Delete(keyAlloc.FailSafeContextKey()); -} - void FailSafeContext::ForceFailSafeTimerExpiry() { if (!IsFailSafeArmed()) @@ -228,5 +135,5 @@ void FailSafeContext::ForceFailSafeTimerExpiry() FailSafeTimerExpired(); } -} // namespace DeviceLayer +} // namespace app } // namespace chip diff --git a/src/include/platform/FailSafeContext.h b/src/app/FailSafeContext.h similarity index 89% rename from src/include/platform/FailSafeContext.h rename to src/app/FailSafeContext.h index 1af60d71a21a37..e2759d85e71f06 100644 --- a/src/include/platform/FailSafeContext.h +++ b/src/app/FailSafeContext.h @@ -23,10 +23,13 @@ #pragma once +#include +#include #include +#include namespace chip { -namespace DeviceLayer { +namespace app { class FailSafeContext { @@ -39,14 +42,18 @@ class FailSafeContext * when the fail-safe timer is currently armed, the currently-running fail-safe timer will * first be cancelled, then the fail-safe timer will be re-armed. */ - CHIP_ERROR ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Timeout expiryLength); + CHIP_ERROR ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Seconds16 expiryLengthSeconds); /** * @brief Cleanly disarm failsafe timer, such as on CommissioningComplete */ void DisarmFailSafe(); - CHIP_ERROR SetAddNocCommandInvoked(FabricIndex nocFabricIndex); - CHIP_ERROR SetUpdateNocCommandInvoked(); + void SetAddNocCommandInvoked(FabricIndex nocFabricIndex) + { + mAddNocCommandHasBeenInvoked = true; + mFabricIndex = nocFabricIndex; + } + void SetUpdateNocCommandInvoked() { mUpdateNocCommandHasBeenInvoked = true; } void SetAddTrustedRootCertInvoked() { mAddTrustedRootCertHasBeenInvoked = true; } void SetCsrRequestForUpdateNoc(bool isForUpdateNoc) { mIsCsrRequestForUpdateNoc = isForUpdateNoc; } @@ -90,9 +97,6 @@ class FailSafeContext // If the failsafe is not armed, this is a no-op. void ForceFailSafeTimerExpiry(); - static CHIP_ERROR LoadFromStorage(FabricIndex & fabricIndex, bool & addNocCommandInvoked, bool & updateNocCommandInvoked); - static CHIP_ERROR DeleteFromStorage(); - private: bool mFailSafeArmed = false; bool mFailSafeBusy = false; @@ -103,13 +107,6 @@ class FailSafeContext bool mIsCsrRequestForUpdateNoc = false; FabricIndex mFabricIndex = kUndefinedFabricIndex; - // TODO:: Track the state of what was mutated during fail-safe. - - static constexpr size_t FailSafeContextTLVMaxSize() - { - return TLV::EstimateStructOverhead(sizeof(FabricIndex), sizeof(bool), sizeof(bool)); - } - /** * @brief * The callback function to be called when "fail-safe timer" expires. @@ -146,5 +143,5 @@ class FailSafeContext CHIP_ERROR CommitToStorage(); }; -} // namespace DeviceLayer +} // namespace app } // namespace chip diff --git a/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp b/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp index 0d0da53e155a06..215c99218ea2fb 100644 --- a/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp +++ b/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp @@ -107,7 +107,7 @@ bool emberAfAdministratorCommissioningClusterOpenCommissioningWindowCallback( FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex(); FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex); - auto & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); auto & commissionMgr = Server::GetInstance().GetCommissioningWindowManager(); VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR)); @@ -169,7 +169,7 @@ bool emberAfAdministratorCommissioningClusterOpenBasicCommissioningWindowCallbac FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex(); FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex); - auto & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); auto & commissionMgr = Server::GetInstance().GetCommissioningWindowManager(); VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR)); diff --git a/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp b/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp index 9f9f0877748317..a4eddf662406b0 100644 --- a/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp +++ b/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp @@ -155,7 +155,7 @@ bool emberAfGeneralCommissioningClusterArmFailSafeCallback(app::CommandHandler * const Commands::ArmFailSafe::DecodableType & commandData) { MATTER_TRACE_EVENT_SCOPE("ArmFailSafe", "GeneralCommissioning"); - FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); Commands::ArmFailSafeResponse::Type response; ChipLogProgress(FailSafe, "GeneralCommissioning: Received ArmFailSafe (%us)", @@ -219,9 +219,9 @@ bool emberAfGeneralCommissioningClusterCommissioningCompleteCallback( { MATTER_TRACE_EVENT_SCOPE("CommissioningComplete", "GeneralCommissioning"); - DeviceControlServer * server = &DeviceLayer::DeviceControlServer::DeviceControlSvr(); - auto & failSafe = server->GetFailSafeContext(); - auto & fabricTable = Server::GetInstance().GetFabricTable(); + DeviceControlServer * devCtrl = &DeviceLayer::DeviceControlServer::DeviceControlSvr(); + auto & failSafe = Server::GetInstance().GetFailSafeContext(); + auto & fabricTable = Server::GetInstance().GetFabricTable(); ChipLogProgress(FailSafe, "GeneralCommissioning: Received CommissioningComplete"); @@ -267,7 +267,7 @@ bool emberAfGeneralCommissioningClusterCommissioningCompleteCallback( */ failSafe.DisarmFailSafe(); CheckSuccess( - server->PostCommissioningCompleteEvent(handle->AsSecureSession()->GetPeerNodeId(), handle->GetFabricIndex()), + devCtrl->PostCommissioningCompleteEvent(handle->AsSecureSession()->GetPeerNodeId(), handle->GetFabricIndex()), Failure); Breadcrumb::Set(commandPath.mEndpointId, 0); diff --git a/src/app/clusters/network-commissioning/network-commissioning.cpp b/src/app/clusters/network-commissioning/network-commissioning.cpp index 1b22b34fe6a3a0..f6d7e11b992b9c 100644 --- a/src/app/clusters/network-commissioning/network-commissioning.cpp +++ b/src/app/clusters/network-commissioning/network-commissioning.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -300,7 +301,7 @@ void FillDebugTextAndNetworkIndex(Commands::NetworkConfigResponse::Type & respon bool CheckFailSafeArmed(CommandHandlerInterface::HandlerContext & ctx) { - DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = chip::Server::GetInstance().GetFailSafeContext(); if (failSafeContext.IsFailSafeArmed(ctx.mCommandHandler.GetAccessingFabricIndex())) { diff --git a/src/app/clusters/operational-credentials-server/operational-credentials-server.cpp b/src/app/clusters/operational-credentials-server/operational-credentials-server.cpp index e7f71c0e3a67cf..5ed6a89301d1c4 100644 --- a/src/app/clusters/operational-credentials-server/operational-credentials-server.cpp +++ b/src/app/clusters/operational-credentials-server/operational-credentials-server.cpp @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #if CHIP_CRYPTO_HSM @@ -56,7 +55,6 @@ #endif using namespace chip; -using namespace ::chip::DeviceLayer; using namespace ::chip::Transport; using namespace chip::app; using namespace chip::app::Clusters; @@ -339,14 +337,6 @@ void FailSafeCleanup(const chip::DeviceLayer::ChipDeviceEvent * event) ChipLogError(Zcl, "OpCreds: failed to delete fabric at index %u: %" CHIP_ERROR_FORMAT, fabricIndex, err.Format()); } } - - // If an UpdateNOC command had been successfully invoked, revert the state of operational key pair, NOC and ICAC for that - // Fabric to the state prior to the Fail-Safe timer being armed, for the Fabric Index that was the subject of the UpdateNOC - // command. - if (event->FailSafeTimerExpired.updateNocCommandHasBeenInvoked) - { - // TODO: Revert the state of operational key pair, NOC and ICAC - } } void OnPlatformEventHandler(const chip::DeviceLayer::ChipDeviceEvent * event, intptr_t arg) @@ -650,8 +640,8 @@ bool emberAfOperationalCredentialsClusterAddNOCCallback(app::CommandHandler * co FabricInfo * newFabricInfo = nullptr; auto & fabricTable = Server::GetInstance().GetFabricTable(); - auto * secureSession = commandObj->GetExchangeContext()->GetSessionHandle()->AsSecureSession(); - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto * secureSession = commandObj->GetExchangeContext()->GetSessionHandle()->AsSecureSession(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); uint8_t compressed_fabric_id_buffer[sizeof(uint64_t)]; MutableByteSpan compressed_fabric_id(compressed_fabric_id_buffer); @@ -737,8 +727,7 @@ bool emberAfOperationalCredentialsClusterAddNOCCallback(app::CommandHandler * co // The Fabric Index associated with the armed fail-safe context SHALL be updated to match the Fabric // Index just allocated. - err = failSafeContext.SetAddNocCommandInvoked(newFabricIndex); - VerifyOrExit(err == CHIP_NO_ERROR, nonDefaultStatus = Status::Failure); + failSafeContext.SetAddNocCommandInvoked(newFabricIndex); // Done all intermediate steps, we are now successful needRevert = false; @@ -816,16 +805,15 @@ bool emberAfOperationalCredentialsClusterUpdateNOCCallback(app::CommandHandler * auto nocResponse = OperationalCertStatus::kSuccess; auto nonDefaultStatus = Status::Success; - bool needRevert = false; CHIP_ERROR err = CHIP_NO_ERROR; FabricIndex fabricIndex = 0; ChipLogProgress(Zcl, "OpCreds: Received an UpdateNOC command"); - auto & fabricTable = Server::GetInstance().GetFabricTable(); - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); - FabricInfo * fabricInfo = RetrieveCurrentFabric(commandObj); + auto & fabricTable = Server::GetInstance().GetFabricTable(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); + FabricInfo * fabricInfo = RetrieveCurrentFabric(commandObj); bool csrWasForUpdateNoc = false; //< Output param of HasPendingOperationalKey bool hasPendingKey = fabricTable.HasPendingOperationalKey(csrWasForUpdateNoc); @@ -849,15 +837,8 @@ bool emberAfOperationalCredentialsClusterUpdateNOCCallback(app::CommandHandler * err = fabricTable.UpdatePendingFabricWithOperationalKeystore(fabricIndex, NOCValue, ICACValue.ValueOr(ByteSpan{})); VerifyOrExit(err == CHIP_NO_ERROR, nocResponse = ConvertToNOCResponseStatus(err)); - // From here if we error-out, we should revert the fabric table pending updates - needRevert = true; - // Flag on the fail-safe context that the UpdateNOC command was invoked. - err = failSafeContext.SetUpdateNocCommandInvoked(); - VerifyOrExit(err == CHIP_NO_ERROR, nocResponse = ConvertToNOCResponseStatus(err)); - - // Done all intermediate steps, we are now successful - needRevert = false; + failSafeContext.SetUpdateNocCommandInvoked(); // We might have a new operational identity, so we should start advertising // it right away. Also, we need to withdraw our old operational identity. @@ -866,11 +847,6 @@ bool emberAfOperationalCredentialsClusterUpdateNOCCallback(app::CommandHandler * // Attribute notification was already done by fabric table exit: - if (needRevert) - { - fabricTable.RevertPendingOpCertsExceptRoot(); - } - // We have an NOC response if (nonDefaultStatus == Status::Success) { @@ -1039,8 +1015,8 @@ bool emberAfOperationalCredentialsClusterCSRRequestCallback(app::CommandHandler // logs by the end. We use finalStatus as our overall success marker, not error CHIP_ERROR err = CHIP_ERROR_INVALID_ARGUMENT; - auto & fabricTable = Server::GetInstance().GetFabricTable(); - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & fabricTable = Server::GetInstance().GetFabricTable(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); auto & CSRNonce = commandData.CSRNonce; bool isForUpdateNoc = commandData.isForUpdateNOC.ValueOr(false); @@ -1157,8 +1133,8 @@ bool emberAfOperationalCredentialsClusterAddTrustedRootCertificateCallback( // logs by the end. We use finalStatus as our overall success marker, not error CHIP_ERROR err = CHIP_ERROR_INVALID_ARGUMENT; - auto & rootCertificate = commandData.rootCertificate; - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & rootCertificate = commandData.rootCertificate; + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); ChipLogProgress(Zcl, "OpCreds: Received an AddTrustedRootCertificate command"); diff --git a/src/app/server/CommissioningWindowManager.cpp b/src/app/server/CommissioningWindowManager.cpp index 0ad4a5b1ab5b1d..4aa945c860805e 100644 --- a/src/app/server/CommissioningWindowManager.cpp +++ b/src/app/server/CommissioningWindowManager.cpp @@ -160,7 +160,7 @@ void CommissioningWindowManager::OnSessionEstablished(const SessionHandle & sess StopAdvertisement(/* aShuttingDown = */ false); - DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); // This should never be armed because we don't allow CASE sessions to arm the failsafe when the commissioning window is open and // we check that the failsafe is not armed before opening the commissioning window. None the less, it is good to double-check. CHIP_ERROR err = CHIP_NO_ERROR; @@ -194,7 +194,7 @@ CHIP_ERROR CommissioningWindowManager::OpenCommissioningWindow(Seconds16 commiss { VerifyOrReturnError(commissioningTimeout <= MaxCommissioningTimeout() && commissioningTimeout >= MinCommissioningTimeout(), CHIP_ERROR_INVALID_ARGUMENT); - DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); VerifyOrReturnError(!failSafeContext.IsFailSafeArmed(), CHIP_ERROR_INCORRECT_STATE); ReturnErrorOnFailure(Dnssd::ServiceAdvertiser::Instance().UpdateCommissionableInstanceName()); @@ -471,7 +471,7 @@ void CommissioningWindowManager::OnSessionReleased() void CommissioningWindowManager::ExpireFailSafeIfArmed() { - DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + auto & failSafeContext = Server::GetInstance().GetFailSafeContext(); if (failSafeContext.IsFailSafeArmed()) { failSafeContext.ForceFailSafeTimerExpiry(); diff --git a/src/app/server/Server.cpp b/src/app/server/Server.cpp index 0ea320dc9f0be8..8ea1db77b19271 100644 --- a/src/app/server/Server.cpp +++ b/src/app/server/Server.cpp @@ -169,9 +169,6 @@ CHIP_ERROR Server::Init(const ServerInitParams & initParams) // This initializes clusters, so should come after lower level initialization. InitDataModelHandler(&mExchangeMgr); - // Clean-up previously standing fail-safes - InitFailSafe(); - // Init transport before operations with secure session mgr. err = mTransports.Init(UdpListenParameters(DeviceLayer::UDPEndPointManager()) .SetAddressType(IPAddressType::kIPv6) @@ -257,7 +254,6 @@ CHIP_ERROR Server::Init(const ServerInitParams & initParams) else { #if CHIP_DEVICE_CONFIG_ENABLE_PAIRING_AUTOSTART - GetFabricTable().DeleteAllFabrics(); SuccessOrExit(err = mCommissioningWindowManager.OpenBasicCommissioningWindow()); #endif } @@ -300,6 +296,41 @@ CHIP_ERROR Server::Init(const ServerInitParams & initParams) RejoinExistingMulticastGroups(); #endif // !CHIP_SYSTEM_CONFIG_USE_OPEN_THREAD_ENDPOINT + // Handle deferred clean-up of a previously armed fail-safe that occurred during FabricTable commit. + // This is done at the very end since at the earlier time above when FabricTable::Init() is called, + // the delegates could not have been registered, and the other systems were not initialized. By now, + // everything is initialized, so we can do a deferred clean-up. + { + FabricIndex fabricIndexDeletedOnInit = GetFabricTable().GetDeletedFabricFromCommitMarker(); + if (fabricIndexDeletedOnInit != kUndefinedFabricIndex) + { + ChipLogError(AppServer, "FabricIndex 0x%x deleted due to restart while fail-safed. Processing a clean-up!", + static_cast(fabricIndexDeletedOnInit)); + + // Always pretend it was an add, since being in the middle of an update currently breaks + // the validity of the fabric table. This is expected to be extremely infrequent, so + // this "harsher" than usual clean-up is more likely to get us in a valid state for whatever + // remains. + const bool addNocCalled = true; + const bool updateNocCalled = false; + GetFailSafeContext().ScheduleFailSafeCleanup(fabricIndexDeletedOnInit, addNocCalled, updateNocCalled); + + // Schedule clearing of the commit marker to only occur after we have processed all fail-safe clean-up. + // Because Matter runs a single event loop for all scheduled work, it will occur after the above has + // taken place. If a reset occurs before we have cleaned everything up, the next boot will still + // see the commit marker. + PlatformMgr().ScheduleWork( + [](intptr_t arg) { + Server * server = reinterpret_cast(arg); + VerifyOrReturn(server != nullptr); + + server->GetFabricTable().ClearCommitMarker(); + ChipLogProgress(AppServer, "Cleared FabricTable pending commit marker"); + }, + reinterpret_cast(this)); + } + } + PlatformMgr().HandleServerStarted(); exit: @@ -315,39 +346,6 @@ CHIP_ERROR Server::Init(const ServerInitParams & initParams) return err; } -void Server::InitFailSafe() -{ - bool failSafeArmed = false; - - CHIP_ERROR err = CHIP_NO_ERROR; - - // If the fail-safe was armed when the device last shutdown, initiate cleanup based on the pending Fail Safe Context with - // which the fail-safe timer was armed. - if (DeviceLayer::ConfigurationMgr().GetFailSafeArmed(failSafeArmed) == CHIP_NO_ERROR && failSafeArmed) - { - FabricIndex fabricIndex; - bool addNocCommandInvoked; - bool updateNocCommandInvoked; - - ChipLogProgress(AppServer, "Detected fail-safe armed on reboot"); - - err = DeviceLayer::FailSafeContext::LoadFromStorage(fabricIndex, addNocCommandInvoked, updateNocCommandInvoked); - if (err == CHIP_NO_ERROR) - { - DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext().ScheduleFailSafeCleanup( - fabricIndex, addNocCommandInvoked, updateNocCommandInvoked); - } - else - { - // This should not happen, but we should not fail system init based on it! - ChipLogError(DeviceLayer, "Failed to load fail-safe context from storage (err= %" CHIP_ERROR_FORMAT "), cleaning-up!", - err.Format()); - (void) DeviceLayer::ConfigurationMgr().SetFailSafeArmed(false); - err = CHIP_NO_ERROR; - } - } -} - void Server::RejoinExistingMulticastGroups() { ChipLogProgress(AppServer, "Joining Multicast groups"); diff --git a/src/app/server/Server.h b/src/app/server/Server.h index 22fa2cd96a5e2f..13eac3aedd8a56 100644 --- a/src/app/server/Server.h +++ b/src/app/server/Server.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -327,6 +328,8 @@ class Server PersistentStorageDelegate & GetPersistentStorage() { return *mDeviceStorage; } + app::FailSafeContext & GetFailSafeContext() { return mFailSafeContext; } + TestEventTriggerDelegate * GetTestEventTriggerDelegate() { return mTestEventTriggerDelegate; } Crypto::OperationalKeystore * GetOperationalKeystore() { return mOperationalKeystore; } @@ -500,6 +503,7 @@ class Server TestEventTriggerDelegate * mTestEventTriggerDelegate; Crypto::OperationalKeystore * mOperationalKeystore; Credentials::OperationalCertificateStore * mOpCertStore; + app::FailSafeContext mFailSafeContext; uint16_t mOperationalServicePort; uint16_t mUserDirectedCommissioningPort; diff --git a/src/app/tests/BUILD.gn b/src/app/tests/BUILD.gn index 868f08a879aa7a..f9c3755c5d33df 100644 --- a/src/app/tests/BUILD.gn +++ b/src/app/tests/BUILD.gn @@ -83,6 +83,7 @@ chip_test_suite("tests") { "TestEventOverflow.cpp", "TestEventPathParams.cpp", "TestFabricScopedEventLogging.cpp", + "TestFailSafeContext.cpp", "TestInteractionModelEngine.cpp", "TestMessageDef.cpp", "TestNumericAttributeTraits.cpp", diff --git a/src/platform/tests/TestFailSafeContext.cpp b/src/app/tests/TestFailSafeContext.cpp similarity index 68% rename from src/platform/tests/TestFailSafeContext.cpp rename to src/app/tests/TestFailSafeContext.cpp index 1c998caee0a2aa..9e5d11dc320b73 100644 --- a/src/platform/tests/TestFailSafeContext.cpp +++ b/src/app/tests/TestFailSafeContext.cpp @@ -28,12 +28,12 @@ #include #include +#include #include #include #include #include #include -#include using namespace chip; using namespace chip::Logging; @@ -58,7 +58,7 @@ static void TestFailSafeContext_ArmFailSafe(nlTestSuite * inSuite, void * inCont { CHIP_ERROR err = CHIP_NO_ERROR; - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + chip::app::FailSafeContext failSafeContext; err = failSafeContext.ArmFailSafe(kTestAccessingFabricIndex1, System::Clock::Seconds16(1)); NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); @@ -75,57 +75,24 @@ static void TestFailSafeContext_NocCommandInvoked(nlTestSuite * inSuite, void * { CHIP_ERROR err = CHIP_NO_ERROR; - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + chip::app::FailSafeContext failSafeContext; err = failSafeContext.ArmFailSafe(kTestAccessingFabricIndex1, System::Clock::Seconds16(1)); NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); NL_TEST_ASSERT(inSuite, failSafeContext.GetFabricIndex() == kTestAccessingFabricIndex1); - err = failSafeContext.SetAddNocCommandInvoked(kTestAccessingFabricIndex2); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); + failSafeContext.SetAddNocCommandInvoked(kTestAccessingFabricIndex2); NL_TEST_ASSERT(inSuite, failSafeContext.NocCommandHasBeenInvoked() == true); NL_TEST_ASSERT(inSuite, failSafeContext.AddNocCommandHasBeenInvoked() == true); NL_TEST_ASSERT(inSuite, failSafeContext.GetFabricIndex() == kTestAccessingFabricIndex2); - err = failSafeContext.SetUpdateNocCommandInvoked(); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); + failSafeContext.SetUpdateNocCommandInvoked(); NL_TEST_ASSERT(inSuite, failSafeContext.NocCommandHasBeenInvoked() == true); NL_TEST_ASSERT(inSuite, failSafeContext.UpdateNocCommandHasBeenInvoked() == true); failSafeContext.DisarmFailSafe(); } -static void TestFailSafeContext_CommitToStorage(nlTestSuite * inSuite, void * inContext) -{ - CHIP_ERROR err = CHIP_NO_ERROR; - - FailSafeContext & failSafeContext = DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); - - err = failSafeContext.ArmFailSafe(kTestAccessingFabricIndex1, System::Clock::Seconds16(1)); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); - NL_TEST_ASSERT(inSuite, failSafeContext.GetFabricIndex() == kTestAccessingFabricIndex1); - - err = failSafeContext.SetAddNocCommandInvoked(kTestAccessingFabricIndex1); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); - NL_TEST_ASSERT(inSuite, failSafeContext.AddNocCommandHasBeenInvoked() == true); - - err = failSafeContext.SetUpdateNocCommandInvoked(); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); - NL_TEST_ASSERT(inSuite, failSafeContext.UpdateNocCommandHasBeenInvoked() == true); - - FabricIndex fabricIndex; - bool addNocCommandInvoked; - bool updateNocCommandInvoked; - - err = FailSafeContext::LoadFromStorage(fabricIndex, addNocCommandInvoked, updateNocCommandInvoked); - NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); - NL_TEST_ASSERT(inSuite, fabricIndex == kTestAccessingFabricIndex1); - NL_TEST_ASSERT(inSuite, addNocCommandInvoked == true); - NL_TEST_ASSERT(inSuite, updateNocCommandInvoked == true); - - failSafeContext.DisarmFailSafe(); -} - /** * Test Suite. It lists all the test functions. */ @@ -134,7 +101,6 @@ static const nlTest sTests[] = { NL_TEST_DEF("Test PlatformMgr::Init", TestPlatformMgr_Init), NL_TEST_DEF("Test FailSafeContext::ArmFailSafe", TestFailSafeContext_ArmFailSafe), NL_TEST_DEF("Test FailSafeContext::NocCommandInvoked", TestFailSafeContext_NocCommandInvoked), - NL_TEST_DEF("Test FailSafeContext::CommitToStorage", TestFailSafeContext_CommitToStorage), NL_TEST_SENTINEL() }; diff --git a/src/credentials/FabricTable.cpp b/src/credentials/FabricTable.cpp index 52a714e82cfeee..a91feb68bdcbfb 100644 --- a/src/credentials/FabricTable.cpp +++ b/src/credentials/FabricTable.cpp @@ -51,6 +51,27 @@ constexpr TLV::Tag kFabricLabelTag = TLV::ContextTag(1); constexpr TLV::Tag kNextAvailableFabricIndexTag = TLV::ContextTag(0); constexpr TLV::Tag kFabricIndicesTag = TLV::ContextTag(1); +// Tags for commit marker storage +constexpr TLV::Tag kMarkerFabricIndexTag = TLV::ContextTag(0); +constexpr TLV::Tag kMarkerIsAdditionTag = TLV::ContextTag(1); + +constexpr size_t CommitMarkerContextTLVMaxSize() +{ + // Add 2x uncommitted uint64_t to leave space for backwards/forwards + // versioning for this critical feature that runs at boot. + return TLV::EstimateStructOverhead(sizeof(FabricIndex), sizeof(bool), sizeof(uint64_t), sizeof(uint64_t)); +} + +constexpr size_t IndexInfoTLVMaxSize() +{ + // We have a single next-available index and an array of anonymous-tagged + // fabric indices. + // + // The max size of the list is (1 byte control + bytes for actual value) + // times max number of list items, plus one byte for the list terminator. + return TLV::EstimateStructOverhead(sizeof(FabricIndex), CHIP_CONFIG_MAX_FABRICS * (1 + sizeof(FabricIndex)) + 1); +} + } // anonymous namespace CHIP_ERROR FabricInfo::Init(const FabricInfo::InitParams & initParams) @@ -994,7 +1015,35 @@ CHIP_ERROR FabricTable::Init(const FabricTable::InitParams & initParams) TLV::ContiguousBufferTLVReader reader; reader.Init(buf, size); - ReturnErrorOnFailure(ReadFabricInfo(reader)); + // TODO: A safer way would be to just clean-up the entire fabric table on this situation... + err = ReadFabricInfo(reader); + if (err != CHIP_NO_ERROR) + { + ChipLogError(FabricProvisioning, "Error loading fabric table: %" CHIP_ERROR_FORMAT ", we are in a bad state!", + err.Format()); + } + + ReturnErrorOnFailure(err); + } + + CommitMarker commitMarker; + err = GetCommitMarker(commitMarker); + if (err == CHIP_NO_ERROR) + { + // Found a commit marker! We need to possibly delete a loaded fabric + ChipLogError(FabricProvisioning, "Found a FabricTable aborted commit for index 0x%x (isAddition: %d), removing!", + static_cast(commitMarker.fabricIndex), static_cast(commitMarker.isAddition)); + + mDeletedFabricIndexFromInit = commitMarker.fabricIndex; + + // Can't do better on error. We just have to hope for the best. + (void) Delete(commitMarker.fabricIndex); + } + else if (err != CHIP_ERROR_PERSISTED_STORAGE_VALUE_NOT_FOUND) + { + // Got an error, but somehow value is not missing altogether: inconsistent state but touch nothing. + ChipLogError(FabricProvisioning, "Error loading Table commit marker: %" CHIP_ERROR_FORMAT ", hope for the best!", + err.Format()); } return CHIP_NO_ERROR; @@ -1036,6 +1085,16 @@ void FabricTable::Shutdown() mStorage = nullptr; } +FabricIndex FabricTable::GetDeletedFabricFromCommitMarker() +{ + FabricIndex retVal = mDeletedFabricIndexFromInit; + + // Reset for next read + mDeletedFabricIndexFromInit = kUndefinedFabricIndex; + + return retVal; +} + CHIP_ERROR FabricTable::AddFabricDelegate(FabricTable::Delegate * delegate) { VerifyOrReturnError(delegate != nullptr, CHIP_ERROR_INVALID_ARGUMENT); @@ -1275,6 +1334,58 @@ CHIP_ERROR FabricTable::ReadFabricInfo(TLV::ContiguousBufferTLVReader & reader) return CHIP_NO_ERROR; } +CHIP_ERROR FabricTable::StoreCommitMarker(const CommitMarker & commitMarker) +{ + DefaultStorageKeyAllocator keyAlloc; + uint8_t tlvBuf[CommitMarkerContextTLVMaxSize()]; + TLV::TLVWriter writer; + writer.Init(tlvBuf); + + TLV::TLVType outerType; + ReturnErrorOnFailure(writer.StartContainer(TLV::AnonymousTag(), TLV::kTLVType_Structure, outerType)); + ReturnErrorOnFailure(writer.Put(kMarkerFabricIndexTag, commitMarker.fabricIndex)); + ReturnErrorOnFailure(writer.Put(kMarkerIsAdditionTag, commitMarker.isAddition)); + ReturnErrorOnFailure(writer.EndContainer(outerType)); + + const auto markerContextTLVLength = writer.GetLengthWritten(); + VerifyOrReturnError(CanCastTo(markerContextTLVLength), CHIP_ERROR_BUFFER_TOO_SMALL); + + return mStorage->SyncSetKeyValue(keyAlloc.FailSafeCommitMarkerKey(), tlvBuf, static_cast(markerContextTLVLength)); +} + +CHIP_ERROR FabricTable::GetCommitMarker(CommitMarker & outCommitMarker) +{ + DefaultStorageKeyAllocator keyAlloc; + uint8_t tlvBuf[CommitMarkerContextTLVMaxSize()]; + uint16_t tlvSize = sizeof(tlvBuf); + ReturnErrorOnFailure(mStorage->SyncGetKeyValue(keyAlloc.FailSafeCommitMarkerKey(), tlvBuf, tlvSize)); + + // If buffer was too small, we won't reach here. + TLV::ContiguousBufferTLVReader reader; + reader.Init(tlvBuf, tlvSize); + ReturnErrorOnFailure(reader.Next(TLV::kTLVType_Structure, TLV::AnonymousTag())); + + TLV::TLVType containerType; + ReturnErrorOnFailure(reader.EnterContainer(containerType)); + + ReturnErrorOnFailure(reader.Next(kMarkerFabricIndexTag)); + ReturnErrorOnFailure(reader.Get(outCommitMarker.fabricIndex)); + + ReturnErrorOnFailure(reader.Next(kMarkerIsAdditionTag)); + ReturnErrorOnFailure(reader.Get(outCommitMarker.isAddition)); + + // Don't try to exit container: we got all we needed. This allows us to + // avoid erroring-out on newer versions. + + return CHIP_NO_ERROR; +} + +void FabricTable::ClearCommitMarker() +{ + DefaultStorageKeyAllocator keyAlloc; + mStorage->SyncDeleteKeyValue(keyAlloc.FailSafeCommitMarkerKey()); +} + bool FabricTable::HasOperationalKeyForFabric(FabricIndex fabricIndex) const { const FabricInfo * fabricInfo = FindFabricWithIndex(fabricIndex); @@ -1677,8 +1788,13 @@ CHIP_ERROR FabricTable::CommitPendingFabricData() } // ==== Start of actual commit transaction after pre-flight checks ==== + CHIP_ERROR stickyError = StoreCommitMarker(CommitMarker{ fabricIndexBeingCommitted, isAdding }); + bool failedCommitMarker = (stickyError != CHIP_NO_ERROR); + if (failedCommitMarker) + { + ChipLogError(FabricProvisioning, "Failed to store commit marker, may be inconsistent if reboot happens during fail-safe!"); + } - CHIP_ERROR stickyError = CHIP_NO_ERROR; { // This scope block is to illustrate the complete commit transaction // state. We can see it contains a LARGE number of items... @@ -1726,6 +1842,23 @@ CHIP_ERROR FabricTable::CommitPendingFabricData() } stickyError = (stickyError != CHIP_NO_ERROR) ? stickyError : keyErr; + // For testing only, early return (NEVER OCCURS OTHERWISE) during the commit + // so that clean-ups using the commit marker can be tested. +#if CONFIG_BUILD_FOR_HOST_UNIT_TEST + { + if (mStateFlags.Has(StateFlags::kAbortCommitForTest)) + { + // Clear state so that shutdown doesn't attempt clean-up + mStateFlags.ClearAll(); + mFabricIndexWithPendingState = kUndefinedFabricIndex; + mPendingFabric.Reset(); + + ChipLogError(FabricProvisioning, "Aborting commit in middle of transaction for testing."); + return CHIP_ERROR_INTERNAL; + } + } +#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST + // Commit operational certs CHIP_ERROR opCertErr = mOpCertStore->CommitOpCertsForFabric(fabricIndexBeingCommitted); if (opCertErr != CHIP_NO_ERROR) @@ -1780,6 +1913,10 @@ CHIP_ERROR FabricTable::CommitPendingFabricData() NotifyFabricCommitted(fabricIndexBeingCommitted); } + // Clear commit marker no matter what: if we got here, there was no reboot and previous clean-ups + // did their job. + ClearCommitMarker(); + return stickyError; } diff --git a/src/credentials/FabricTable.h b/src/credentials/FabricTable.h index 445a9cfa9865f4..b7cffb12dcf114 100644 --- a/src/credentials/FabricTable.h +++ b/src/credentials/FabricTable.h @@ -422,6 +422,25 @@ class DLL_EXPORT FabricTable CHIP_ERROR Init(const FabricTable::InitParams & initParams); void Shutdown(); + /** + * @brief If `Init()` caused a Delete due to partial commit, the fabric index at play is returned. + * + * Allows caller to schedule more clean-up. This is because at Init() time, none of the delegates + * are registered yet, so no other modules would learn of the removal. + * + * The value is auto-reset to `kUndefinedFabricIndex` on being returned, so that subsequent + * `GetDeletedFabricFromCommitMarker()` after one that has a fabric index to give will provide + * `kUndefinedFabricIndex`. + * + * @return the fabric index of a just-deleted fabric, or kUndefinedFabricIndex if none were deleted. + */ + FabricIndex GetDeletedFabricFromCommitMarker(); + + /** + * @brief Clear the commit marker when we are sure we have proceeded with any remaining clean-up + */ + void ClearCommitMarker(); + // Forget a fabric in memory: doesn't delete any persistent state, just // reverts any pending state (blindly) and then resets the fabric table // entry. @@ -820,8 +839,24 @@ class DLL_EXPORT FabricTable return err; } + // For test only. See definition of `StateFlags::kAbortCommitForTest`. + void SetForceAbortCommitForTest(bool abortCommitForTest) + { + (void) abortCommitForTest; +#if CONFIG_BUILD_FOR_HOST_UNIT_TEST + if (abortCommitForTest) + { + mStateFlags.Set(StateFlags::kAbortCommitForTest); + } + else + { + mStateFlags.Clear(StateFlags::kAbortCommitForTest); + } +#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST + } + private: - enum class StateFlags : uint8_t + enum class StateFlags : uint16_t { // If true, we are in the process of a fail-safe and there was at least one // operation that caused partial data in the fabric table. @@ -838,17 +873,25 @@ class DLL_EXPORT FabricTable // True if we allow more than one fabric with same root and fabricId in the fabric table // for test purposes. This disables a collision check. kAreCollidingFabricsIgnored = (1u << 6), + + // If set to true (only possible on test builds), will cause `CommitPendingFabricData()` to early + // return during commit, skipping clean-ups, so that we can validate commit marker fabric removal. + kAbortCommitForTest = (1u << 7), }; - static constexpr size_t IndexInfoTLVMaxSize() + // Stored to indicate a commit is in progress, so that it can be cleaned-up on next boot + // if stopped in the middle. + struct CommitMarker { - // We have a single next-available index and an array of anonymous-tagged - // fabric indices. - // - // The max size of the list is (1 byte control + bytes for actual value) - // times max number of list items, plus one byte for the list terminator. - return TLV::EstimateStructOverhead(sizeof(FabricIndex), CHIP_CONFIG_MAX_FABRICS * (1 + sizeof(FabricIndex)) + 1); - } + CommitMarker() = default; + CommitMarker(FabricIndex fabricIndex_, bool isAddition_) + { + this->fabricIndex = fabricIndex_; + this->isAddition = isAddition_; + } + FabricIndex fabricIndex = kUndefinedFabricIndex; + bool isAddition = false; + }; // Load a FabricInfo metatada item from storage for a given new fabric index. Returns internal error on failure. CHIP_ERROR LoadFromStorage(FabricInfo * fabric, FabricIndex newFabricIndex); @@ -955,6 +998,10 @@ class DLL_EXPORT FabricTable CHIP_ERROR NotifyFabricUpdated(FabricIndex fabricIndex); CHIP_ERROR NotifyFabricCommitted(FabricIndex fabricIndex); + // Commit management clean-up APIs + CHIP_ERROR StoreCommitMarker(const CommitMarker & commitMarker); + CHIP_ERROR GetCommitMarker(CommitMarker & outCommitMarker); + FabricInfo mStates[CHIP_CONFIG_MAX_FABRICS]; // Used for UpdateNOC pending fabric updates FabricInfo mPendingFabric; @@ -970,6 +1017,9 @@ class DLL_EXPORT FabricTable // for which there is currently pending data. FabricIndex mFabricIndexWithPendingState = kUndefinedFabricIndex; + // For when a revert occurs during init, so that more clean-up can be scheduled by caller. + FabricIndex mDeletedFabricIndexFromInit = kUndefinedFabricIndex; + LastKnownGoodTime mLastKnownGoodTime; // We may not have an mNextAvailableFabricIndex if our table is as large as diff --git a/src/credentials/tests/TestFabricTable.cpp b/src/credentials/tests/TestFabricTable.cpp index 8fa0a5aed55aa9..b652dfbfd120a3 100644 --- a/src/credentials/tests/TestFabricTable.cpp +++ b/src/credentials/tests/TestFabricTable.cpp @@ -1742,6 +1742,219 @@ void TestInvalidChaining(nlTestSuite * inSuite, void * inContext) // TODO: Write test } +void TestCommitMarker(nlTestSuite * inSuite, void * inContext) +{ + Crypto::P256PublicKey fIdx1PublicKey; + Crypto::P256PublicKey fIdx2PublicKey; + + Credentials::TestOnlyLocalCertificateAuthority fabricCertAuthority; + + chip::TestPersistentStorageDelegate storage; + + // Log verbosity on this test helps debug significantly + storage.SetLoggingLevel(chip::TestPersistentStorageDelegate::LoggingLevel::kLogMutationAndReads); + + NL_TEST_ASSERT(inSuite, fabricCertAuthority.Init().IsSuccess()); + + constexpr uint16_t kVendorId = 0xFFF1u; + + size_t numStorageKeysAfterFirstAdd = 0; + + // First scope: add 2 fabrics with same root: + // - FabricID 1111, Node ID 55 + // - FabricID 2222, Node ID 66 + // - Abort commit on second fabric + { + // Initialize a FabricTable + ScopedFabricTable fabricTableHolder; + NL_TEST_ASSERT(inSuite, fabricTableHolder.Init(&storage) == CHIP_NO_ERROR); + FabricTable & fabricTable = fabricTableHolder.GetFabricTable(); + + NL_TEST_ASSERT(inSuite, fabricTable.GetDeletedFabricFromCommitMarker() == kUndefinedFabricIndex); + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 0); + + // Add Fabric 1111 Node Id 55 + { + FabricId fabricId = 1111; + NodeId nodeId = 55; + + uint8_t csrBuf[chip::Crypto::kMAX_CSR_Length]; + MutableByteSpan csrSpan{ csrBuf }; + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.AllocatePendingOperationalKey(chip::NullOptional, csrSpan)); + + NL_TEST_ASSERT_SUCCESS( + inSuite, fabricCertAuthority.SetIncludeIcac(true).GenerateNocChain(fabricId, nodeId, csrSpan).GetStatus()); + ByteSpan rcac = fabricCertAuthority.GetRcac(); + ByteSpan icac = fabricCertAuthority.GetIcac(); + ByteSpan noc = fabricCertAuthority.GetNoc(); + + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 0); + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.AddNewPendingTrustedRootCert(rcac)); + FabricIndex newFabricIndex = kUndefinedFabricIndex; + NL_TEST_ASSERT_SUCCESS(inSuite, + fabricTable.AddNewPendingFabricWithOperationalKeystore(noc, icac, kVendorId, &newFabricIndex)); + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 1); + NL_TEST_ASSERT(inSuite, newFabricIndex == 1); + + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.CommitPendingFabricData()); + + // Validate contents + const auto * fabricInfo = fabricTable.FindFabricWithIndex(1); + NL_TEST_ASSERT(inSuite, fabricInfo != nullptr); + if (fabricInfo != nullptr) + { + Credentials::ChipCertificateSet certificates; + NL_TEST_ASSERT_SUCCESS(inSuite, certificates.Init(1)); + NL_TEST_ASSERT_SUCCESS(inSuite, + certificates.LoadCert(rcac, BitFlags(CertDecodeFlags::kIsTrustAnchor))); + Crypto::P256PublicKey rcacPublicKey(certificates.GetCertSet()[0].mPublicKey); + + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricIndex() == 1); + NL_TEST_ASSERT(inSuite, fabricInfo->GetNodeId() == 55); + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricId() == 1111); + NL_TEST_ASSERT(inSuite, fabricInfo->GetVendorId() == kVendorId); + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricLabel().size() == 0); + + Crypto::P256PublicKey rootPublicKeyOfFabric; + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.FetchRootPubkey(newFabricIndex, rootPublicKeyOfFabric)); + NL_TEST_ASSERT(inSuite, rootPublicKeyOfFabric.Matches(rcacPublicKey)); + } + + // Validate that fabric has the correct operational key by verifying a signature + { + Crypto::P256ECDSASignature sig; + uint8_t message[] = { 'm', 's', 'g' }; + + NL_TEST_ASSERT_SUCCESS(inSuite, VerifyCertificateSigningRequest(csrSpan.data(), csrSpan.size(), fIdx1PublicKey)); + + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.SignWithOpKeypair(newFabricIndex, ByteSpan{ message }, sig)); + NL_TEST_ASSERT_SUCCESS(inSuite, fIdx1PublicKey.ECDSA_validate_msg_signature(&message[0], sizeof(message), sig)); + } + } + numStorageKeysAfterFirstAdd = storage.GetNumKeys(); + + NL_TEST_ASSERT(inSuite, numStorageKeysAfterFirstAdd == 7); // Metadata, index, 3 certs, 1 opkey, last known good time + + // The following test requires test methods not available on all builds. + // TODO: Debug why some CI jobs don't set it properly. +#if CONFIG_BUILD_FOR_HOST_UNIT_TEST + + // Add Fabric 2222 Node Id 66, no ICAC *** AND ABORT COMMIT *** + { + FabricId fabricId = 2222; + NodeId nodeId = 66; + + uint8_t csrBuf[chip::Crypto::kMAX_CSR_Length]; + MutableByteSpan csrSpan{ csrBuf }; + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.AllocatePendingOperationalKey(chip::NullOptional, csrSpan)); + + NL_TEST_ASSERT_SUCCESS( + inSuite, fabricCertAuthority.SetIncludeIcac(false).GenerateNocChain(fabricId, nodeId, csrSpan).GetStatus()); + ByteSpan rcac = fabricCertAuthority.GetRcac(); + ByteSpan noc = fabricCertAuthority.GetNoc(); + + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 1); + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.AddNewPendingTrustedRootCert(rcac)); + FabricIndex newFabricIndex = kUndefinedFabricIndex; + NL_TEST_ASSERT_SUCCESS( + inSuite, fabricTable.AddNewPendingFabricWithOperationalKeystore(noc, ByteSpan{}, kVendorId, &newFabricIndex)); + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 2); + NL_TEST_ASSERT(inSuite, newFabricIndex == 2); + + // Validate contents of pending + const auto * fabricInfo = fabricTable.FindFabricWithIndex(2); + NL_TEST_ASSERT(inSuite, fabricInfo != nullptr); + if (fabricInfo != nullptr) + { + Credentials::ChipCertificateSet certificates; + NL_TEST_ASSERT_SUCCESS(inSuite, certificates.Init(1)); + NL_TEST_ASSERT_SUCCESS(inSuite, + certificates.LoadCert(rcac, BitFlags(CertDecodeFlags::kIsTrustAnchor))); + Crypto::P256PublicKey rcacPublicKey(certificates.GetCertSet()[0].mPublicKey); + + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricIndex() == 2); + NL_TEST_ASSERT(inSuite, fabricInfo->GetNodeId() == 66); + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricId() == 2222); + NL_TEST_ASSERT(inSuite, fabricInfo->GetVendorId() == kVendorId); + NL_TEST_ASSERT(inSuite, fabricInfo->GetFabricLabel().size() == 0); + + Crypto::P256PublicKey rootPublicKeyOfFabric; + NL_TEST_ASSERT_SUCCESS(inSuite, fabricTable.FetchRootPubkey(newFabricIndex, rootPublicKeyOfFabric)); + NL_TEST_ASSERT(inSuite, rootPublicKeyOfFabric.Matches(rcacPublicKey)); + } + + // Make sure no additional storage yet + NL_TEST_ASSERT(inSuite, storage.GetNumKeys() == numStorageKeysAfterFirstAdd); + + // --> FORCE AN ERROR ON COMMIT that will BYPASS commit clean-up (similar to reboot during commit) + fabricTable.SetForceAbortCommitForTest(true); + NL_TEST_ASSERT(inSuite, fabricTable.CommitPendingFabricData() == CHIP_ERROR_INTERNAL); + + // Check that there are more keys now, partially committed: at least a Commit Marker (+1) + // and some more keys from the aborted process. + NL_TEST_ASSERT(inSuite, storage.GetNumKeys() > (numStorageKeysAfterFirstAdd + 1)); + } +#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST + } + +#if CONFIG_BUILD_FOR_HOST_UNIT_TEST + { + storage.DumpKeys(); + + // Initialize a FabricTable again. Make sure it succeeds in initing. + ScopedFabricTable fabricTableHolder; + + NL_TEST_ASSERT(inSuite, storage.GetNumKeys() > (numStorageKeysAfterFirstAdd + 1)); + + NL_TEST_ASSERT(inSuite, fabricTableHolder.Init(&storage) == CHIP_NO_ERROR); + FabricTable & fabricTable = fabricTableHolder.GetFabricTable(); + + // Make sure that after init, the fabricTable has only 1 fabric + NL_TEST_ASSERT_EQUALS(inSuite, fabricTable.FabricCount(), 1); + + // Make sure it caught the last partially committed fabric + NL_TEST_ASSERT(inSuite, fabricTable.GetDeletedFabricFromCommitMarker() == 2); + + // Second read must return kUndefinedFabricIndex + NL_TEST_ASSERT(inSuite, fabricTable.GetDeletedFabricFromCommitMarker() == kUndefinedFabricIndex); + + { + // Here we would do other clean-ups (e.g. see Server.cpp that uses the above) and then + // clear the commit marker after. + fabricTable.ClearCommitMarker(); + } + + // Make sure that all other pending storage got deleted + NL_TEST_ASSERT(inSuite, storage.GetNumKeys() == numStorageKeysAfterFirstAdd); + + // Verify we can only see 1 fabric with the iterator + { + size_t numFabricsIterated = 0; + bool saw1 = false; + bool saw2 = false; + for (const auto & iterFabricInfo : fabricTable) + { + ++numFabricsIterated; + if (iterFabricInfo.GetFabricIndex() == 1) + { + NL_TEST_ASSERT(inSuite, iterFabricInfo.GetNodeId() == 55); + NL_TEST_ASSERT(inSuite, iterFabricInfo.GetFabricId() == 1111); + saw1 = true; + } + if (iterFabricInfo.GetFabricIndex() == 2) + { + saw2 = true; + } + } + + NL_TEST_ASSERT(inSuite, numFabricsIterated == 1); + NL_TEST_ASSERT(inSuite, saw1 == true); + NL_TEST_ASSERT(inSuite, saw2 == false); + } + } +#endif // CONFIG_BUILD_FOR_HOST_UNIT_TEST +} + // Test Suite /** @@ -1763,6 +1976,7 @@ static const nlTest sTests[] = NL_TEST_DEF("Test compressed fabric ID is properly generated", TestCompressedFabricId), NL_TEST_DEF("Test AddNOC root collision", TestAddNocRootCollision), NL_TEST_DEF("Test invalid chaining in AddNOC and UpdateNOC", TestInvalidChaining), + NL_TEST_DEF("Test proper detection of Commit Marker on init", TestCommitMarker), NL_TEST_SENTINEL() }; diff --git a/src/include/platform/ConfigurationManager.h b/src/include/platform/ConfigurationManager.h index 5d71e89e1eca51..fae1475030eb11 100644 --- a/src/include/platform/ConfigurationManager.h +++ b/src/include/platform/ConfigurationManager.h @@ -29,8 +29,8 @@ #include #include #include -#include #include +#include #include namespace chip { diff --git a/src/include/platform/DeviceControlServer.h b/src/include/platform/DeviceControlServer.h index 865211e016ad86..34c91401e4e5a5 100644 --- a/src/include/platform/DeviceControlServer.h +++ b/src/include/platform/DeviceControlServer.h @@ -23,7 +23,6 @@ #pragma once #include -#include #include namespace chip { @@ -38,14 +37,11 @@ class DeviceControlServer final CHIP_ERROR SetRegulatoryConfig(uint8_t location, const CharSpan & countryCode); CHIP_ERROR PostConnectedToOperationalNetworkEvent(ByteSpan networkID); - FailSafeContext & GetFailSafeContext() { return mFailSafeContext; } - static DeviceControlServer & DeviceControlSvr(); private: // ===== Members for internal use by the following friends. static DeviceControlServer sInstance; - FailSafeContext mFailSafeContext; // ===== Private members reserved for use by this class only. diff --git a/src/lib/support/DefaultStorageKeyAllocator.h b/src/lib/support/DefaultStorageKeyAllocator.h index b17b0b3d6f905b..efddd0f1f743f6 100644 --- a/src/lib/support/DefaultStorageKeyAllocator.h +++ b/src/lib/support/DefaultStorageKeyAllocator.h @@ -50,8 +50,8 @@ class DefaultStorageKeyAllocator const char * FabricMetadata(FabricIndex fabric) { return Format("f/%x/m", fabric); } const char * FabricOpKey(FabricIndex fabric) { return Format("f/%x/o", fabric); } - // FailSafeContext - const char * FailSafeContextKey() { return Format("g/fs/c"); } + // Fail-safe handling + const char * FailSafeCommitMarkerKey() { return Format("g/fs/c"); } static const char * FailSafeNetworkConfig() { return "g/fs/n"; } // LastKnownGoodTime diff --git a/src/platform/BUILD.gn b/src/platform/BUILD.gn index 4a5cdf0b61d204..fdcc45d0424ac7 100644 --- a/src/platform/BUILD.gn +++ b/src/platform/BUILD.gn @@ -321,7 +321,6 @@ if (chip_device_platform != "none") { "../include/platform/ConnectivityManager.h", "../include/platform/DeviceControlServer.h", "../include/platform/DeviceInstanceInfoProvider.h", - "../include/platform/FailSafeContext.h", "../include/platform/GeneralUtils.h", "../include/platform/KeyValueStoreManager.h", "../include/platform/KvsPersistentStorageDelegate.h", @@ -366,7 +365,6 @@ if (chip_device_platform != "none") { "DeviceInstanceInfoProvider.cpp", "DiagnosticDataProvider.cpp", "Entropy.cpp", - "FailSafeContext.cpp", "GeneralUtils.cpp", "Globals.cpp", "LockTracker.cpp", diff --git a/src/platform/OpenThread/GenericNetworkCommissioningThreadDriver.cpp b/src/platform/OpenThread/GenericNetworkCommissioningThreadDriver.cpp index b48c09164389a1..5b41aac5f2fdf2 100644 --- a/src/platform/OpenThread/GenericNetworkCommissioningThreadDriver.cpp +++ b/src/platform/OpenThread/GenericNetworkCommissioningThreadDriver.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -86,6 +87,8 @@ CHIP_ERROR GenericThreadDriver::RevertConfiguration() ReturnErrorCodeIf(error == CHIP_ERROR_PERSISTED_STORAGE_VALUE_NOT_FOUND, CHIP_NO_ERROR); ReturnErrorOnFailure(error); + ChipLogError(NetworkProvisioning, "Found Thread configuration backup: reverting configuration"); + // Not all KVS implementations support zero-length values, so handle a special value representing an empty dataset. ByteSpan dataset(datasetBytes, datasetLength); @@ -97,6 +100,7 @@ CHIP_ERROR GenericThreadDriver::RevertConfiguration() ReturnErrorOnFailure(mStagingNetwork.Init(dataset)); ReturnErrorOnFailure(DeviceLayer::ThreadStackMgrImpl().AttachToThreadNetwork(mStagingNetwork, /* callback */ nullptr)); + // TODO: What happens on errors above? Why do we not remove the failsafe? return KeyValueStoreMgr().Delete(DefaultStorageKeyAllocator::FailSafeNetworkConfig()); } diff --git a/src/platform/tests/BUILD.gn b/src/platform/tests/BUILD.gn index 9f447b6b4c5aa0..d34c8b00c88ce4 100644 --- a/src/platform/tests/BUILD.gn +++ b/src/platform/tests/BUILD.gn @@ -88,10 +88,7 @@ if (chip_device_platform != "none" && chip_device_platform != "fake") { } if (chip_device_platform == "linux") { - test_sources += [ - "TestConnectivityMgr.cpp", - "TestFailSafeContext.cpp", - ] + test_sources += [ "TestConnectivityMgr.cpp" ] } } } else { diff --git a/src/test_driver/nrfconnect/main/include/CHIPProjectConfig.h b/src/test_driver/nrfconnect/main/include/CHIPProjectConfig.h index 3a25601cfd884e..3c3df5b97d593a 100644 --- a/src/test_driver/nrfconnect/main/include/CHIPProjectConfig.h +++ b/src/test_driver/nrfconnect/main/include/CHIPProjectConfig.h @@ -30,4 +30,7 @@ // Enable support functions for parsing command-line arguments #define CHIP_CONFIG_ENABLE_ARG_PARSER 1 +// Enable unit-test only features +#define CONFIG_BUILD_FOR_HOST_UNIT_TEST 1 + #endif // CHIP_PROJECT_CONFIG_H