Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly handle crashes/reboots during FabricTable commit #20010

Merged
2 changes: 2 additions & 0 deletions src/app/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ static_library("app") {
"DeviceProxy.h",
"EventManagement.cpp",
"EventPathParams.h",
"FailSafeContext.cpp",
"FailSafeContext.h",
"GlobalAttributes.h",
"InteractionModelEngine.cpp",
"InteractionModelRevision.h",
Expand Down
113 changes: 10 additions & 103 deletions src/platform/FailSafeContext.cpp → src/app/FailSafeContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,15 @@
* Provides the implementation of the FailSafeContext object.
*/

#include <platform/FailSafeContext.h>

#include <lib/support/DefaultStorageKeyAllocator.h>
#include <lib/support/SafeInt.h>
#include <platform/ConfigurationManager.h>
#include <platform/internal/CHIPDeviceLayerInternal.h>

namespace chip {
namespace DeviceLayer {
#include "FailSafeContext.h"

using namespace chip::DeviceLayer;

namespace {
constexpr TLV::Tag kFabricIndexTag = TLV::ContextTag(0);
constexpr TLV::Tag kAddNocCommandTag = TLV::ContextTag(1);
constexpr TLV::Tag kUpdateNocCommandTag = TLV::ContextTag(2);
} // anonymous namespace
namespace chip {
namespace app {

void FailSafeContext::HandleArmFailSafeTimer(System::Layer * layer, void * aAppState)
{
Expand Down Expand Up @@ -89,7 +84,7 @@ void FailSafeContext::ScheduleFailSafeCleanup(FabricIndex fabricIndex, bool addN
PlatformMgr().ScheduleWork(HandleDisarmFailSafe, reinterpret_cast<intptr_t>(this));
}

CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Timeout expiryLength)
CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Seconds16 expiryLengthSeconds)
{
CHIP_ERROR err = CHIP_NO_ERROR;
bool cancelTimersIfError = false;
Expand All @@ -100,9 +95,8 @@ CHIP_ERROR FailSafeContext::ArmFailSafe(FabricIndex accessingFabricIndex, System
cancelTimersIfError = true;
}

SuccessOrExit(err = DeviceLayer::SystemLayer().StartTimer(expiryLength, HandleArmFailSafeTimer, this));
SuccessOrExit(err = CommitToStorage());
SuccessOrExit(err = ConfigurationMgr().SetFailSafeArmed(true));
SuccessOrExit(
err = DeviceLayer::SystemLayer().StartTimer(System::Clock::Seconds16(expiryLengthSeconds), HandleArmFailSafeTimer, this));

mFailSafeArmed = true;
mFabricIndex = accessingFabricIndex;
Expand All @@ -124,96 +118,9 @@ void FailSafeContext::DisarmFailSafe()

ResetState();

if (ConfigurationMgr().SetFailSafeArmed(false) != CHIP_NO_ERROR)
{
ChipLogError(FailSafe, "Failed to set FailSafeArmed config to false");
}

if (DeleteFromStorage() != CHIP_NO_ERROR)
{
ChipLogError(FailSafe, "Failed to delete FailSafeContext from configuration");
}

ChipLogProgress(FailSafe, "Fail-safe cleanly disarmed");
}

CHIP_ERROR FailSafeContext::SetAddNocCommandInvoked(FabricIndex nocFabricIndex)
{
mAddNocCommandHasBeenInvoked = true;
mFabricIndex = nocFabricIndex;

ReturnErrorOnFailure(CommitToStorage());

return CHIP_NO_ERROR;
}

CHIP_ERROR FailSafeContext::SetUpdateNocCommandInvoked()
{
mUpdateNocCommandHasBeenInvoked = true;

ReturnErrorOnFailure(CommitToStorage());

return CHIP_NO_ERROR;
}

CHIP_ERROR FailSafeContext::CommitToStorage()
{
DefaultStorageKeyAllocator keyAlloc;
uint8_t buf[FailSafeContextTLVMaxSize()];
TLV::TLVWriter writer;
writer.Init(buf);

TLV::TLVType outerType;
ReturnErrorOnFailure(writer.StartContainer(TLV::AnonymousTag(), TLV::kTLVType_Structure, outerType));
ReturnErrorOnFailure(writer.Put(kFabricIndexTag, mFabricIndex));

// TODO: Stop storing this, and just make the fail-safe context volatile and sweep-up stale data next boot on partial commits
ReturnErrorOnFailure(writer.Put(kAddNocCommandTag, mAddNocCommandHasBeenInvoked));
ReturnErrorOnFailure(writer.Put(kUpdateNocCommandTag, mUpdateNocCommandHasBeenInvoked));
ReturnErrorOnFailure(writer.EndContainer(outerType));

const auto failSafeContextTLVLength = writer.GetLengthWritten();
VerifyOrReturnError(CanCastTo<uint16_t>(failSafeContextTLVLength), CHIP_ERROR_BUFFER_TOO_SMALL);

return PersistedStorage::KeyValueStoreMgr().Put(keyAlloc.FailSafeContextKey(), buf,
static_cast<uint16_t>(failSafeContextTLVLength));
}

CHIP_ERROR FailSafeContext::LoadFromStorage(FabricIndex & fabricIndex, bool & addNocCommandInvoked, bool & updateNocCommandInvoked)
{
DefaultStorageKeyAllocator keyAlloc;
uint8_t buf[FailSafeContextTLVMaxSize()];
ReturnErrorOnFailure(PersistedStorage::KeyValueStoreMgr().Get(keyAlloc.FailSafeContextKey(), buf, sizeof(buf)));

TLV::ContiguousBufferTLVReader reader;
reader.Init(buf, sizeof(buf));
ReturnErrorOnFailure(reader.Next(TLV::kTLVType_Structure, TLV::AnonymousTag()));

TLV::TLVType containerType;
ReturnErrorOnFailure(reader.EnterContainer(containerType));

ReturnErrorOnFailure(reader.Next(kFabricIndexTag));
ReturnErrorOnFailure(reader.Get(fabricIndex));

ReturnErrorOnFailure(reader.Next(kAddNocCommandTag));
ReturnErrorOnFailure(reader.Get(addNocCommandInvoked));

ReturnErrorOnFailure(reader.Next(kUpdateNocCommandTag));
ReturnErrorOnFailure(reader.Get(updateNocCommandInvoked));

ReturnErrorOnFailure(reader.VerifyEndOfContainer());
ReturnErrorOnFailure(reader.ExitContainer(containerType));

return CHIP_NO_ERROR;
}

CHIP_ERROR FailSafeContext::DeleteFromStorage()
{
DefaultStorageKeyAllocator keyAlloc;

return PersistedStorage::KeyValueStoreMgr().Delete(keyAlloc.FailSafeContextKey());
}

void FailSafeContext::ForceFailSafeTimerExpiry()
{
if (!IsFailSafeArmed())
Expand All @@ -228,5 +135,5 @@ void FailSafeContext::ForceFailSafeTimerExpiry()
FailSafeTimerExpired();
}

} // namespace DeviceLayer
} // namespace app
} // namespace chip
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@

#pragma once

#include <lib/core/CHIPError.h>
#include <lib/core/DataModelTypes.h>
#include <platform/internal/CHIPDeviceLayerInternal.h>
#include <system/SystemClock.h>

namespace chip {
namespace DeviceLayer {
namespace app {

class FailSafeContext
{
Expand All @@ -39,14 +42,18 @@ class FailSafeContext
* when the fail-safe timer is currently armed, the currently-running fail-safe timer will
* first be cancelled, then the fail-safe timer will be re-armed.
*/
CHIP_ERROR ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Timeout expiryLength);
CHIP_ERROR ArmFailSafe(FabricIndex accessingFabricIndex, System::Clock::Seconds16 expiryLengthSeconds);

/**
* @brief Cleanly disarm failsafe timer, such as on CommissioningComplete
*/
void DisarmFailSafe();
CHIP_ERROR SetAddNocCommandInvoked(FabricIndex nocFabricIndex);
CHIP_ERROR SetUpdateNocCommandInvoked();
void SetAddNocCommandInvoked(FabricIndex nocFabricIndex)
{
mAddNocCommandHasBeenInvoked = true;
tcarmelveilleux marked this conversation as resolved.
Show resolved Hide resolved
mFabricIndex = nocFabricIndex;
}
void SetUpdateNocCommandInvoked() { mUpdateNocCommandHasBeenInvoked = true; }
void SetAddTrustedRootCertInvoked() { mAddTrustedRootCertHasBeenInvoked = true; }
void SetCsrRequestForUpdateNoc(bool isForUpdateNoc) { mIsCsrRequestForUpdateNoc = isForUpdateNoc; }

Expand Down Expand Up @@ -90,9 +97,6 @@ class FailSafeContext
// If the failsafe is not armed, this is a no-op.
void ForceFailSafeTimerExpiry();

static CHIP_ERROR LoadFromStorage(FabricIndex & fabricIndex, bool & addNocCommandInvoked, bool & updateNocCommandInvoked);
static CHIP_ERROR DeleteFromStorage();

private:
bool mFailSafeArmed = false;
bool mFailSafeBusy = false;
Expand All @@ -103,13 +107,6 @@ class FailSafeContext
bool mIsCsrRequestForUpdateNoc = false;
FabricIndex mFabricIndex = kUndefinedFabricIndex;

// TODO:: Track the state of what was mutated during fail-safe.

static constexpr size_t FailSafeContextTLVMaxSize()
{
return TLV::EstimateStructOverhead(sizeof(FabricIndex), sizeof(bool), sizeof(bool));
}

/**
* @brief
* The callback function to be called when "fail-safe timer" expires.
Expand Down Expand Up @@ -146,5 +143,5 @@ class FailSafeContext
CHIP_ERROR CommitToStorage();
};

} // namespace DeviceLayer
} // namespace app
} // namespace chip
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ bool emberAfAdministratorCommissioningClusterOpenCommissioningWindowCallback(

FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex();
FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
auto & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
auto & failSafeContext = Server::GetInstance().GetFailSafeContext();
auto & commissionMgr = Server::GetInstance().GetCommissioningWindowManager();

VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR));
Expand Down Expand Up @@ -169,7 +169,7 @@ bool emberAfAdministratorCommissioningClusterOpenBasicCommissioningWindowCallbac

FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex();
FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
auto & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
auto & failSafeContext = Server::GetInstance().GetFailSafeContext();
auto & commissionMgr = Server::GetInstance().GetCommissioningWindowManager();

VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ bool emberAfGeneralCommissioningClusterArmFailSafeCallback(app::CommandHandler *
const Commands::ArmFailSafe::DecodableType & commandData)
{
MATTER_TRACE_EVENT_SCOPE("ArmFailSafe", "GeneralCommissioning");
FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
auto & failSafeContext = Server::GetInstance().GetFailSafeContext();
Commands::ArmFailSafeResponse::Type response;

ChipLogProgress(FailSafe, "GeneralCommissioning: Received ArmFailSafe (%us)",
Expand Down Expand Up @@ -219,9 +219,9 @@ bool emberAfGeneralCommissioningClusterCommissioningCompleteCallback(
{
MATTER_TRACE_EVENT_SCOPE("CommissioningComplete", "GeneralCommissioning");

DeviceControlServer * server = &DeviceLayer::DeviceControlServer::DeviceControlSvr();
auto & failSafe = server->GetFailSafeContext();
auto & fabricTable = Server::GetInstance().GetFabricTable();
DeviceControlServer * devCtrl = &DeviceLayer::DeviceControlServer::DeviceControlSvr();
auto & failSafe = Server::GetInstance().GetFailSafeContext();
auto & fabricTable = Server::GetInstance().GetFabricTable();

ChipLogProgress(FailSafe, "GeneralCommissioning: Received CommissioningComplete");

Expand Down Expand Up @@ -267,7 +267,7 @@ bool emberAfGeneralCommissioningClusterCommissioningCompleteCallback(
*/
failSafe.DisarmFailSafe();
CheckSuccess(
server->PostCommissioningCompleteEvent(handle->AsSecureSession()->GetPeerNodeId(), handle->GetFabricIndex()),
devCtrl->PostCommissioningCompleteEvent(handle->AsSecureSession()->GetPeerNodeId(), handle->GetFabricIndex()),
Failure);

Breadcrumb::Set(commandPath.mEndpointId, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <app/CommandHandlerInterface.h>
#include <app/InteractionModelEngine.h>
#include <app/clusters/general-commissioning-server/general-commissioning-server.h>
#include <app/server/Server.h>
#include <app/util/attribute-storage.h>
#include <lib/support/SafeInt.h>
#include <lib/support/ThreadOperationalDataset.h>
Expand Down Expand Up @@ -301,7 +302,7 @@ void FillDebugTextAndNetworkIndex(Commands::NetworkConfigResponse::Type & respon

bool CheckFailSafeArmed(CommandHandlerInterface::HandlerContext & ctx)
{
DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
auto & failSafeContext = chip::Server::GetInstance().GetFailSafeContext();

if (failSafeContext.IsFailSafeArmed(ctx.mCommandHandler.GetAccessingFabricIndex()))
{
Expand Down
Loading