Skip to content

Commit

Permalink
Failsafe fixes (project-chip#16973)
Browse files Browse the repository at this point in the history
* Failsafe checks

Do not allow CASE connections to arm the failsafe the first time
if there is a commissioning window open.

* Disarm failsafe immediately if we get 0 timeout.

* Do not allow commissioning window to be opened if failsafe is held

* Force expire failsafe on Commissioning window close

* Run test in cirque

* Arm failsafe on PASE session establishment complete

* Failsafe fixes from other PR

* Restyled by autopep8

* Remove newly removed argument

* manually cancel timer.

* Fixes from review.

Co-authored-by: Restyled.io <[email protected]>
  • Loading branch information
2 people authored and chencheung committed Apr 6, 2022
1 parent 9863865 commit c4fde73
Show file tree
Hide file tree
Showing 7 changed files with 360 additions and 5 deletions.
1 change: 1 addition & 0 deletions scripts/tests/cirque_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OT_SIMULATION_CACHE_STAMP_FILE="$CIRQUE_CACHE_PATH/ot-simulation.commit"
CIRQUE_TESTS=(
"EchoTest"
"EchoOverTcpTest"
"FailsafeTest"
"MobileDeviceTest"
"CommissioningTest"
"InteractionModelTest"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <lib/support/CodeUtils.h>
#include <lib/support/logging/CHIPLogging.h>
#include <platform/CommissionableDataProvider.h>
#include <platform/DeviceControlServer.h>
#include <protocols/interaction_model/Constants.h>
#include <setup_payload/SetupPayload.h>
#include <system/SystemClock.h>
Expand Down Expand Up @@ -104,9 +105,12 @@ bool emberAfAdministratorCommissioningClusterOpenCommissioningWindowCallback(

ChipLogProgress(Zcl, "Received command to open commissioning window");

FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex();
FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex();
FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();

VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR));
VerifyOrExit(!failSafeContext.IsFailSafeArmed(), status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY));

VerifyOrExit(Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() ==
CommissioningWindowStatus::kWindowNotOpen,
Expand Down Expand Up @@ -165,11 +169,15 @@ bool emberAfAdministratorCommissioningClusterOpenBasicCommissioningWindowCallbac

FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex();
FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex);
chip::DeviceLayer::FailSafeContext & failSafeContext =
DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();

VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR));

VerifyOrExit(Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() ==
CommissioningWindowStatus::kWindowNotOpen,
status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY));
VerifyOrExit(!failSafeContext.IsFailSafeArmed(), status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY));
VerifyOrExit(commissioningTimeout <= CommissioningWindowManager::MaxCommissioningTimeout(),
globalStatus = InteractionModel::Status::InvalidCommand);
VerifyOrExit(commissioningTimeout >= CommissioningWindowManager::MinCommissioningTimeout(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <app-common/zap-generated/cluster-objects.h>
#include <app/CommandHandler.h>
#include <app/ConcreteCommandPath.h>
#include <app/server/CommissioningWindowManager.h>
#include <app/server/Server.h>
#include <app/util/af.h>
#include <app/util/attribute-storage.h>
#include <lib/support/Span.h>
Expand Down Expand Up @@ -162,22 +164,36 @@ bool emberAfGeneralCommissioningClusterArmFailSafeCallback(app::CommandHandler *

FabricIndex accessingFabricIndex = commandObj->GetAccessingFabricIndex();

// We do not allow CASE connections to arm the failsafe for the first time while the commissioning window is open in order
// to allow commissioners the opportunity to obtain this failsafe for the purpose of commissioning
if (!failSafeContext.IsFailSafeBusy() &&
(!failSafeContext.IsFailSafeArmed() || failSafeContext.MatchesFabricIndex(accessingFabricIndex)))
{
if (commandData.expiryLengthSeconds == 0)
// We do not allow CASE connections to arm the failsafe for the first time while the commissioning window is open in order
// to allow commissioners the opportunity to obtain this failsafe for the purpose of commissioning
if (!failSafeContext.IsFailSafeArmed() &&
Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() !=
AdministratorCommissioning::CommissioningWindowStatus::kWindowNotOpen &&
commandObj->GetSubjectDescriptor().authMode == Access::AuthMode::kCase)
{
response.errorCode = CommissioningError::kBusyWithOtherAdmin;
commandObj->AddResponse(commandPath, response);
}
else if (commandData.expiryLengthSeconds == 0)
{
// Force the timer to expire immediately.
failSafeContext.ForceFailSafeTimerExpiry();
response.errorCode = CommissioningError::kOk;
commandObj->AddResponse(commandPath, response);
}
else
{
CheckSuccess(
failSafeContext.ArmFailSafe(accessingFabricIndex, System::Clock::Seconds16(commandData.expiryLengthSeconds)),
Failure);
response.errorCode = CommissioningError::kOk;
commandObj->AddResponse(commandPath, response);
}
response.errorCode = CommissioningError::kOk;
commandObj->AddResponse(commandPath, response);
}
else
{
Expand Down
25 changes: 25 additions & 0 deletions src/app/server/CommissioningWindowManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <lib/support/CodeUtils.h>
#include <platform/CHIPDeviceLayer.h>
#include <platform/CommissionableDataProvider.h>
#include <platform/DeviceControlServer.h>

using namespace chip::app::Clusters;
using namespace chip::System::Clock;
Expand Down Expand Up @@ -94,6 +95,11 @@ void CommissioningWindowManager::ResetState()
void CommissioningWindowManager::Cleanup()
{
StopAdvertisement(/* aShuttingDown = */ false);
DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
if (failSafeContext.IsFailSafeArmed())
{
failSafeContext.ForceFailSafeTimerExpiry();
}

ResetState();
}
Expand Down Expand Up @@ -161,6 +167,23 @@ void CommissioningWindowManager::OnSessionEstablished()
DeviceLayer::PlatformMgr().AddEventHandler(OnPlatformEventWrapper, reinterpret_cast<intptr_t>(this));

StopAdvertisement(/* aShuttingDown = */ false);

DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
// This should never be armed because we don't allow CASE sessions to arm the failsafe when the commissioning window is open and
// we check that the failsafe is not armed before opening the commissioning window. None the less, it is good to double-check.
if (failSafeContext.IsFailSafeArmed())
{
ChipLogError(AppServer, "Error - arm failsafe is already armed on PASE session establishment completion");
}
else
{
err = failSafeContext.ArmFailSafe(kUndefinedFabricId, System::Clock::Seconds16(60));
if (err != CHIP_NO_ERROR)
{
ChipLogError(AppServer, "Error arming failsafe on PASE session establishment completion");
}
}

ChipLogProgress(AppServer, "Device completed Rendezvous process");
}

Expand All @@ -169,6 +192,8 @@ CHIP_ERROR CommissioningWindowManager::OpenCommissioningWindow(Seconds16 commiss
VerifyOrReturnError(commissioningTimeout <= MaxCommissioningTimeout() &&
commissioningTimeout >= mMinCommissioningTimeoutOverride.ValueOr(MinCommissioningTimeout()),
CHIP_ERROR_INVALID_ARGUMENT);
DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext();
VerifyOrReturnError(!failSafeContext.IsFailSafeArmed(), CHIP_ERROR_INCORRECT_STATE);

ReturnErrorOnFailure(DeviceLayer::SystemLayer().StartTimer(commissioningTimeout, HandleCommissioningWindowTimeout, this));

Expand Down
78 changes: 78 additions & 0 deletions src/controller/python/test/test_scripts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from chip.ChipStack import *
import chip.FabricAdmin
import copy
import secrets

logger = logging.getLogger('PythonMatterControllerTEST')
logger.setLevel(logging.INFO)
Expand Down Expand Up @@ -269,6 +270,83 @@ def TestKeyExchange(self, ip: str, setuppin: int, nodeid: int):
def TestUsedTestCommissioner(self):
return self.devCtrl.GetTestCommissionerUsed()

def TestFailsafe(self, nodeid: int):
self.logger.info("Testing arm failsafe")

self.logger.info("Setting failsafe on CASE connection")
err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid,
0, 0, dict(expiryLengthSeconds=60, breadcrumb=1), blocking=True)
if err != 0:
self.logger.error(
"Failed to send arm failsafe command error is {} with im response{}".format(err, resp))
return False

if resp.errorCode is not Clusters.GeneralCommissioning.Enums.CommissioningError.kOk:
self.logger.error(
"Incorrect response received from arm failsafe - wanted OK, received {}".format(resp))
return False

self.logger.info(
"Attempting to open basic commissioning window - this should fail since the failsafe is armed")
try:
res = asyncio.run(self.devCtrl.SendCommand(
nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenBasicCommissioningWindow(180), timedRequestTimeoutMs=10000))
# we actually want the exception here because we want to see a failure, so return False here
self.logger.error(
'Incorrectly succeeded in opening basic commissioning window')
return False
except Exception as ex:
pass

# TODO: pipe through the commissioning window opener so we can test enhanced properly. The pake verifier is just garbage because none of of the functions to calculate
# it or serialize it are available right now. However, this command should fail BEFORE that becomes an issue.
discriminator = 1111
salt = secrets.token_bytes(16)
iterations = 2000
# not the right size or the right contents, but it won't matter
verifier = secrets.token_bytes(32)
self.logger.info(
"Attempting to open enhanced commissioning window - this should fail since the failsafe is armed")
try:
res = asyncio.run(self.devCtrl.SendCommand(nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenCommissioningWindow(
commissioningTimeout=180, PAKEVerifier=verifier, discriminator=discriminator, iterations=iterations, salt=salt), timedRequestTimeoutMs=10000))
# we actually want the exception here because we want to see a failure, so return False here
self.logger.error(
'Incorrectly succeeded in opening enhanced commissioning window')
return False
except Exception as ex:
pass

self.logger.info("Disarming failsafe on CASE connection")
err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid,
0, 0, dict(expiryLengthSeconds=0, breadcrumb=1), blocking=True)
if err != 0:
self.logger.error(
"Failed to send arm failsafe command error is {} with im response{}".format(err, resp))
return False

self.logger.info(
"Opening Commissioning Window - this should succeed since the failsafe was just disarmed")
try:
res = asyncio.run(self.devCtrl.SendCommand(
nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenBasicCommissioningWindow(180), timedRequestTimeoutMs=10000))
except Exception as ex:
self.logger.error(
'Failed to open commissioning window after disarming failsafe')
return False

self.logger.info(
"Attempting to arm failsafe over CASE - this should fail since the commissioning window is open")
err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid,
0, 0, dict(expiryLengthSeconds=60, breadcrumb=1), blocking=True)
if err != 0:
self.logger.error(
"Failed to send arm failsafe command error is {} with im response{}".format(err, resp))
return False
if resp.errorCode is Clusters.GeneralCommissioning.Enums.CommissioningError.kBusyWithOtherAdmin:
return True
return False

async def TestMultiFabric(self, ip: str, setuppin: int, nodeid: int):
self.logger.info("Opening Commissioning Window")

Expand Down
119 changes: 119 additions & 0 deletions src/controller/python/test/test_scripts/failsafe_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3

#
# Copyright (c) 2021 Project CHIP Authors
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Commissioning test.
import os
import sys
from optparse import OptionParser
from base import TestFail, TestTimeout, BaseTestHelper, FailIfNot, logger
from cluster_objects import NODE_ID, ClusterObjectTests
from network_commissioning import NetworkCommissioningTests
import asyncio

# The thread network dataset tlv for testing, splited into T-L-V.

TEST_THREAD_NETWORK_DATASET_TLV = "0e080000000000010000" + \
"000300000c" + \
"35060004001fffe0" + \
"0208fedcba9876543210" + \
"0708fd00000000001234" + \
"0510ffeeddccbbaa99887766554433221100" + \
"030e54657374696e674e6574776f726b" + \
"0102d252" + \
"041081cb3b2efa781cc778397497ff520fa50c0302a0ff"
# Network id, for the thread network, current a const value, will be changed to XPANID of the thread network.
TEST_THREAD_NETWORK_ID = "fedcba9876543210"
TEST_DISCRIMINATOR = 3840

ENDPOINT_ID = 0
LIGHTING_ENDPOINT_ID = 1
GROUP_ID = 0


def main():
optParser = OptionParser()
optParser.add_option(
"-t",
"--timeout",
action="store",
dest="testTimeout",
default=75,
type='int',
help="The program will return with timeout after specified seconds.",
metavar="<timeout-second>",
)
optParser.add_option(
"-a",
"--address",
action="store",
dest="deviceAddress",
default='',
type='str',
help="Address of the device",
metavar="<device-addr>",
)
optParser.add_option(
"-p",
"--paa-trust-store-path",
action="store",
dest="paaTrustStorePath",
default='',
type='str',
help="Path that contains valid and trusted PAA Root Certificates.",
metavar="<paa-trust-store-path>"
)

(options, remainingArgs) = optParser.parse_args(sys.argv[1:])

timeoutTicker = TestTimeout(options.testTimeout)
timeoutTicker.start()

test = BaseTestHelper(
nodeid=112233, paaTrustStorePath=options.paaTrustStorePath, testCommissioner=False)

logger.info("Testing discovery")
FailIfNot(test.TestDiscovery(discriminator=TEST_DISCRIMINATOR),
"Failed to discover any devices.")

FailIfNot(test.SetNetworkCommissioningParameters(dataset=TEST_THREAD_NETWORK_DATASET_TLV),
"Failed to finish network commissioning")

logger.info("Testing key exchange")
FailIfNot(test.TestKeyExchange(ip=options.deviceAddress,
setuppin=20202021,
nodeid=1),
"Failed to finish key exchange")

FailIfNot(test.TestFailsafe(nodeid=1), "Failed failsafe test")

timeoutTicker.stop()

logger.info("Test finished")

# TODO: Python device controller cannot be shutdown clean sometimes and will block on AsyncDNSResolverSockets shutdown.
# Call os._exit(0) to force close it.
os._exit(0)


if __name__ == "__main__":
try:
main()
except Exception as ex:
logger.exception(ex)
TestFail("Exception occurred when running tests.")
Loading

0 comments on commit c4fde73

Please sign in to comment.