Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix UpdateNOC session invalidation and opcreds timing #20461

Merged
merged 4 commits into from
Jul 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/chip-tool/commands/tests/TestCommand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ CHIP_ERROR TestCommand::WaitForCommissionee(const char * identity,
// or is just starting out fresh outright. Let's make sure we're not re-using any cached CASE sessions
// that will now be stale and mismatched with the peer, causing subsequent interactions to fail.
//
GetCommissioner(identity).SessionMgr()->ExpireAllPairings(chip::ScopedNodeId(value.nodeId, fabricIndex));
GetCommissioner(identity).SessionMgr()->ExpireAllSessions(chip::ScopedNodeId(value.nodeId, fabricIndex));

SetIdentity(identity);
return GetCommissioner(identity).GetConnectedDevice(value.nodeId, &mOnDeviceConnectedCallback,
Expand Down
19 changes: 19 additions & 0 deletions src/app/CommandHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,25 @@ class CommandHandler
*/
Messaging::ExchangeContext * GetExchangeContext() const { return mpExchangeCtx; }

/**
* @brief Flush acks right away for a slow command
*
* Some commands that do heavy lifting of storage/crypto should
* ack right away to improve reliability and reduce needless retries. This
* method can be manually called in commands that are especially slow to
* immediately schedule an acknowledgement (if needed) since the delayed
* stand-alone ack timer may actually not hit soon enough due to blocking command
* execution.
*
*/
void FlushAcksRightAwayOnSlowCommand()
{
VerifyOrReturn(mpExchangeCtx != nullptr);
auto * msgContext = mpExchangeCtx->GetReliableMessageContext();
VerifyOrReturn(msgContext != nullptr);
msgContext->FlushAcks();
}

Access::SubjectDescriptor GetSubjectDescriptor() const { return mpExchangeCtx->GetSessionHandle()->GetSubjectDescriptor(); }

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ CHIP_ERROR DeleteFabricFromTable(FabricIndex fabricIndex)
void CleanupSessionsForFabric(SessionManager & sessionMgr, FabricIndex fabricIndex)
{
InteractionModelEngine::GetInstance()->CloseTransactionsFromFabricIndex(fabricIndex);
sessionMgr.ExpireAllPairingsForFabric(fabricIndex);
sessionMgr.ExpireAllSessionsForFabric(fabricIndex);
}

void FailSafeCleanup(const chip::DeviceLayer::ChipDeviceEvent * event)
Expand All @@ -278,13 +278,16 @@ void FailSafeCleanup(const chip::DeviceLayer::ChipDeviceEvent * event)
// Session Context at the Server.
if (event->FailSafeTimerExpired.addNocCommandHasBeenInvoked || event->FailSafeTimerExpired.updateNocCommandHasBeenInvoked)
{
CASESessionManager * caseSessionManager = Server::GetInstance().GetCASESessionManager();
if (caseSessionManager)
// TODO(#19259): The following scope will no longer need to exist after #19259 is fixed
{
const FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
VerifyOrReturn(fabricInfo != nullptr);
CASESessionManager * caseSessionManager = Server::GetInstance().GetCASESessionManager();
if (caseSessionManager)
{
const FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
VerifyOrReturn(fabricInfo != nullptr);

caseSessionManager->ReleaseSessionsForFabric(fabricInfo->GetFabricIndex());
caseSessionManager->ReleaseSessionsForFabric(fabricInfo->GetFabricIndex());
}
}

SessionManager & sessionMgr = Server::GetInstance().GetSecureSessionManager();
Expand Down Expand Up @@ -416,6 +419,8 @@ bool emberAfOperationalCredentialsClusterRemoveFabricCallback(app::CommandHandle
return true;
}

commandObj->FlushAcksRightAwayOnSlowCommand();
tcarmelveilleux marked this conversation as resolved.
Show resolved Hide resolved

CHIP_ERROR err = DeleteFabricFromTable(fabricBeingRemoved);
SuccessOrExit(err);

Expand Down Expand Up @@ -638,6 +643,9 @@ bool emberAfOperationalCredentialsClusterAddNOCCallback(app::CommandHandler * co
// Internal error that would prevent IPK from being added
VerifyOrExit(groupDataProvider != nullptr, nonDefaultStatus = Status::Failure);

// Flush acks before really slow work
commandObj->FlushAcksRightAwayOnSlowCommand();

// TODO: Add support for calling AddNOC without a prior AddTrustedRootCertificate if
// the root properly matches an existing one.

Expand Down Expand Up @@ -803,6 +811,9 @@ bool emberAfOperationalCredentialsClusterUpdateNOCCallback(app::CommandHandler *
VerifyOrExit(fabricInfo != nullptr, nocResponse = ConvertToNOCResponseStatus(CHIP_ERROR_INSUFFICIENT_PRIVILEGE));
fabricIndex = fabricInfo->GetFabricIndex();

// Flush acks before really slow work
commandObj->FlushAcksRightAwayOnSlowCommand();

err = fabricTable.UpdatePendingFabricWithOperationalKeystore(fabricIndex, NOCValue, ICACValue.ValueOr(ByteSpan{}));
VerifyOrExit(err == CHIP_NO_ERROR, nocResponse = ConvertToNOCResponseStatus(err));

Expand Down Expand Up @@ -830,6 +841,17 @@ bool emberAfOperationalCredentialsClusterUpdateNOCCallback(app::CommandHandler *
else
{
ChipLogProgress(Zcl, "OpCreds: UpdateNOC successful.");

// On success, revoke all CASE sessions on the fabric hosting the exchange.
// From spec:
//
// All internal data reflecting the prior operational identifier of the Node within the Fabric
// SHALL be revoked and removed, to an outcome equivalent to the disappearance of the prior Node,
// except for the ongoing CASE session context, which SHALL temporarily remain valid until the
// `NOCResponse` has been successfully delivered or until the next transport-layer error, so
// that the response can be received by the Administrator invoking the command.

commandObj->GetExchangeContext()->AbortAllOtherCommunicationOnFabric();
}
}
// No NOC response - Failed constraints
Expand Down Expand Up @@ -921,6 +943,10 @@ bool emberAfOperationalCredentialsClusterAttestationRequestCallback(app::Command
const ByteSpan kEmptyFirmwareInfo;

ChipLogProgress(Zcl, "OpCreds: Received an AttestationRequest command");

// Flush acks before really slow work
commandObj->FlushAcksRightAwayOnSlowCommand();

Credentials::DeviceAttestationCredentialsProvider * dacProvider = Credentials::GetDeviceAttestationCredentialsProvider();

VerifyOrExit(attestationNonce.size() == Credentials::kExpectedAttestationNonceSize, finalStatus = Status::InvalidCommand);
Expand Down Expand Up @@ -1020,6 +1046,9 @@ bool emberAfOperationalCredentialsClusterCSRRequestCallback(app::CommandHandler
VerifyOrExit(failSafeContext.IsFailSafeArmed(commandObj->GetAccessingFabricIndex()), finalStatus = Status::FailsafeRequired);
VerifyOrExit(!failSafeContext.NocCommandHasBeenInvoked(), finalStatus = Status::ConstraintError);

// Flush acks before really slow work
commandObj->FlushAcksRightAwayOnSlowCommand();

// Prepare NOCSRElements structure
{
constexpr size_t csrLength = Crypto::kMAX_CSR_Length;
Expand Down Expand Up @@ -1143,10 +1172,12 @@ bool emberAfOperationalCredentialsClusterAddTrustedRootCertificateCallback(
// be useful in the context.
VerifyOrExit(!failSafeContext.NocCommandHasBeenInvoked(), finalStatus = Status::ConstraintError);

// TODO: Handle checking for byte-to-byte match with existing fabrics
// before allowing the add
// Flush acks before really slow work
commandObj->FlushAcksRightAwayOnSlowCommand();

// TODO(#17208): Handle checking for byte-to-byte match with existing fabrics before allowing the add

// TODO: Validate cert signature prior to setting.
// TODO(#17208): Validate cert signature prior to setting.
err = fabricTable.AddNewPendingTrustedRootCert(rootCertificate);
VerifyOrExit(err != CHIP_ERROR_NO_MEMORY, finalStatus = Status::ResourceExhausted);

Expand Down
2 changes: 1 addition & 1 deletion src/app/server/CommissioningWindowManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void CommissioningWindowManager::OnPlatformEvent(const DeviceLayer::ChipDeviceEv
DeviceLayer::SystemLayer().CancelTimer(HandleCommissioningWindowTimeout, this);
mCommissioningTimeoutTimerArmed = false;
Cleanup();
mServer->GetSecureSessionManager().ExpireAllPASEPairings();
mServer->GetSecureSessionManager().ExpireAllPASESessions();
// That should have cleared out mPASESession.
#if CONFIG_NETWORK_LAYER_BLE
mServer->GetBleLayerObject()->CloseAllBleConnections();
Expand Down
2 changes: 1 addition & 1 deletion src/controller/CHIPDeviceController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ void DeviceController::Shutdown()
// sessions. It just shuts down any ongoing CASE session establishment
// we're in the middle of as initiator. Maybe it should shut down
// existing sessions too?
mSystemState->SessionMgr()->ExpireAllPairingsForFabric(mFabricIndex);
mSystemState->SessionMgr()->ExpireAllSessionsForFabric(mFabricIndex);

FabricTable * fabricTable = mSystemState->Fabrics();
if (fabricTable != nullptr)
Expand Down
3 changes: 3 additions & 0 deletions src/controller/CHIPDeviceController.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,14 @@ class DLL_EXPORT DeviceController : public AbstractDnssdDiscoveryController
return mSystemState->Fabrics();
}

// TODO(#20452): This should be removed/renamed once #20452 is fixed
void ReleaseOperationalDevice(NodeId remoteNodeId);

OperationalCredentialsDelegate * GetOperationalCredentialsDelegate() { return mOperationalCredentialsDelegate; }

/**
* TODO(#20452): This needs to be refactored to reflect what it actually does (which is not disconnecting anything)
*
* TEMPORARY - DO NOT USE or if you use please request review on why/how to
* officially support such an API.
*
Expand Down
11 changes: 11 additions & 0 deletions src/controller/python/ChipDeviceController-ScriptBinding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ ChipError::StorageType pychip_GetConnectedDeviceByNodeId(chip::Controller::Devic
DeviceAvailableFunc callback);
ChipError::StorageType pychip_GetDeviceBeingCommissioned(chip::Controller::DeviceCommissioner * devCtrl, chip::NodeId nodeId,
CommissioneeDeviceProxy ** proxy);
ChipError::StorageType pychip_ExpireSessions(chip::Controller::DeviceCommissioner * devCtrl, chip::NodeId nodeId);

uint64_t pychip_GetCommandSenderHandle(chip::DeviceProxy * device);

chip::ChipError::StorageType pychip_InteractionModel_ShutdownSubscription(SubscriptionId subscriptionId);
Expand Down Expand Up @@ -646,6 +648,15 @@ ChipError::StorageType pychip_GetDeviceBeingCommissioned(chip::Controller::Devic
return devCtrl->GetDeviceBeingCommissioned(nodeId, proxy).AsInteger();
}

// This is a method called VERY seldom, just for RemoveFabric/UpdateNOC
ChipError::StorageType pychip_ExpireSessions(chip::Controller::DeviceCommissioner * devCtrl, chip::NodeId nodeId)
{
VerifyOrReturnError((devCtrl != nullptr) && (devCtrl->SessionMgr() != nullptr), CHIP_ERROR_INVALID_ARGUMENT.AsInteger());
(void) devCtrl->ReleaseOperationalDevice(nodeId);
devCtrl->SessionMgr()->ExpireAllSessions(ScopedNodeId(nodeId, devCtrl->GetFabricIndex()));
return CHIP_NO_ERROR.AsInteger();
}

ChipError::StorageType pychip_DeviceCommissioner_CloseBleConnection(chip::Controller::DeviceCommissioner * devCtrl)
{
#if CONFIG_NETWORK_LAYER_BLE
Expand Down
18 changes: 18 additions & 0 deletions src/controller/python/chip/ChipDeviceCtrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,21 @@ def CloseBLEConnection(self):
self.devCtrl)
)

def ExpireSessions(self, nodeid):
"""Close all sessions with `nodeid` (if any existed) so that sessions get re-established.
This is needed to properly handle operations that invalidate a node's state, such as
UpdateNOC.
WARNING: ONLY CALL THIS IF YOU UNDERSTAND THE SIDE-EFFECTS
"""
self.CheckIsActive()

res = self._ChipStack.Call(lambda: self._dmLib.pychip_ExpireSessions(self.devCtrl, nodeid))
if res != 0:
raise self._ChipStack.ErrorToException(res)

# TODO: This needs to be called MarkSessionDefunct
def CloseSession(self, nodeid):
self.CheckIsActive()

Expand Down Expand Up @@ -1106,6 +1121,9 @@ def _InitLib(self):
c_void_p, c_uint64, c_void_p]
self._dmLib.pychip_GetDeviceBeingCommissioned.restype = c_uint32

self._dmLib.pychip_ExpireSessions.argtypes = [c_void_p, c_uint64]
self._dmLib.pychip_ExpireSessions.restype = c_uint32

self._dmLib.pychip_DeviceCommissioner_CloseBleConnection.argtypes = [
c_void_p]
self._dmLib.pychip_DeviceCommissioner_CloseBleConnection.restype = c_uint32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ async def UpdateNOC(devCtrl, existingNodeId, newNodeId):
await devCtrl.SendCommand(existingNodeId, 0, generalCommissioning.Commands.ArmFailSafe(0), timedRequestTimeoutMs=1000)
return False

# Forget our session since the peer deleted it
devCtrl.ExpireSessions(existingNodeId)

resp = await devCtrl.SendCommand(newNodeId, 0, generalCommissioning.Commands.CommissioningComplete())
if resp.errorCode is not generalCommissioning.Enums.CommissioningError.kOk:
# Expiring the failsafe timer in an attempt to clean up.
Expand Down
2 changes: 1 addition & 1 deletion src/messaging/ExchangeContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ void ExchangeContext::AbortAllOtherCommunicationOnFabric()

SetIgnoreSessionRelease(true);

GetExchangeMgr()->GetSessionManager()->ExpireAllPairingsForFabric(mSession->GetFabricIndex());
GetExchangeMgr()->GetSessionManager()->ExpireAllSessionsForFabric(mSession->GetFabricIndex());

mSession.GrabExpiredSession(session.Value());

Expand Down
14 changes: 7 additions & 7 deletions src/transport/SessionManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void SessionManager::Shutdown()

/**
* @brief Notification that a fabric was removed.
* This function doesn't call ExpireAllPairingsForFabric
* This function doesn't call ExpireAllSessionsForFabric
* since the CASE session might still be open to send a response
* on the removed fabric.
*/
Expand Down Expand Up @@ -342,7 +342,7 @@ CHIP_ERROR SessionManager::SendPreparedMessage(const SessionHandle & sessionHand
return CHIP_ERROR_INCORRECT_STATE;
}

void SessionManager::ExpireAllPairings(const ScopedNodeId & node)
void SessionManager::ExpireAllSessions(const ScopedNodeId & node)
{
mSecureSessions.ForEachSession([&](auto session) {
if (session->GetPeer() == node)
Expand All @@ -353,21 +353,21 @@ void SessionManager::ExpireAllPairings(const ScopedNodeId & node)
});
}

void SessionManager::ExpireAllPairingsForFabric(FabricIndex fabric)
void SessionManager::ExpireAllSessionsForFabric(FabricIndex fabricIndex)
{
ChipLogDetail(Inet, "Expiring all connections for fabric %d!!", fabric);
ChipLogDetail(Inet, "Expiring all sessions for fabric 0x%x!!", static_cast<unsigned>(fabricIndex));
tcarmelveilleux marked this conversation as resolved.
Show resolved Hide resolved
mSecureSessions.ForEachSession([&](auto session) {
if (session->GetFabricIndex() == fabric)
if (session->GetFabricIndex() == fabricIndex)
{
session->MarkForEviction();
}
return Loop::Continue;
});
}

void SessionManager::ExpireAllPASEPairings()
void SessionManager::ExpireAllPASESessions()
{
ChipLogDetail(Inet, "Expiring all PASE pairings");
ChipLogDetail(Inet, "Expiring all PASE sessions");
mSecureSessions.ForEachSession([&](auto session) {
if (session->GetSecureSessionType() == Transport::SecureSession::Type::kPASE)
{
Expand Down
6 changes: 3 additions & 3 deletions src/transport/SessionManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ class DLL_EXPORT SessionManager : public TransportMgrDelegate, public FabricTabl
Optional<SessionHandle> AllocateSession(Transport::SecureSession::Type secureSessionType,
const ScopedNodeId & sessionEvictionHint);

void ExpireAllPairings(const ScopedNodeId & node);
void ExpireAllPairingsForFabric(FabricIndex fabric);
void ExpireAllPASEPairings();
void ExpireAllSessions(const ScopedNodeId & node);
void ExpireAllSessionsForFabric(FabricIndex fabricIndex);
void ExpireAllPASESessions();

/**
* @brief
Expand Down