From 03cac1bcbbad7c4dce20aa716862eccf9d80750f Mon Sep 17 00:00:00 2001 From: C Freeman Date: Mon, 4 Apr 2022 20:42:06 -0400 Subject: [PATCH] Failsafe fixes (#16973) * Failsafe checks Do not allow CASE connections to arm the failsafe the first time if there is a commissioning window open. * Disarm failsafe immediately if we get 0 timeout. * Do not allow commissioning window to be opened if failsafe is held * Force expire failsafe on Commissioning window close * Run test in cirque * Arm failsafe on PASE session establishment complete * Failsafe fixes from other PR * Restyled by autopep8 * Remove newly removed argument * manually cancel timer. * Fixes from review. Co-authored-by: Restyled.io --- scripts/tests/cirque_tests.sh | 1 + .../administrator-commissioning-server.cpp | 12 +- .../general-commissioning-server.cpp | 22 +++- src/app/server/CommissioningWindowManager.cpp | 25 ++++ .../python/test/test_scripts/base.py | 78 ++++++++++++ .../test/test_scripts/failsafe_tests.py | 119 ++++++++++++++++++ src/test_driver/linux-cirque/FailsafeTest.py | 108 ++++++++++++++++ 7 files changed, 360 insertions(+), 5 deletions(-) create mode 100755 src/controller/python/test/test_scripts/failsafe_tests.py create mode 100755 src/test_driver/linux-cirque/FailsafeTest.py diff --git a/scripts/tests/cirque_tests.sh b/scripts/tests/cirque_tests.sh index 2f7457005c5c76..4d04aae9e0af8d 100755 --- a/scripts/tests/cirque_tests.sh +++ b/scripts/tests/cirque_tests.sh @@ -37,6 +37,7 @@ OT_SIMULATION_CACHE_STAMP_FILE="$CIRQUE_CACHE_PATH/ot-simulation.commit" CIRQUE_TESTS=( "EchoTest" "EchoOverTcpTest" + "FailsafeTest" "MobileDeviceTest" "CommissioningTest" "InteractionModelTest" diff --git a/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp b/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp index 7ae2421b5d5875..a022e26560b6ff 100644 --- a/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp +++ b/src/app/clusters/administrator-commissioning-server/administrator-commissioning-server.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -104,9 +105,12 @@ bool emberAfAdministratorCommissioningClusterOpenCommissioningWindowCallback( ChipLogProgress(Zcl, "Received command to open commissioning window"); - FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex(); - FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex); + FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex(); + FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex); + DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR)); + VerifyOrExit(!failSafeContext.IsFailSafeArmed(), status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY)); VerifyOrExit(Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() == CommissioningWindowStatus::kWindowNotOpen, @@ -165,11 +169,15 @@ bool emberAfAdministratorCommissioningClusterOpenBasicCommissioningWindowCallbac FabricIndex fabricIndex = commandObj->GetAccessingFabricIndex(); FabricInfo * fabricInfo = Server::GetInstance().GetFabricTable().FindFabricWithIndex(fabricIndex); + chip::DeviceLayer::FailSafeContext & failSafeContext = + DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + VerifyOrExit(fabricInfo != nullptr, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_PAKE_PARAMETER_ERROR)); VerifyOrExit(Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() == CommissioningWindowStatus::kWindowNotOpen, status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY)); + VerifyOrExit(!failSafeContext.IsFailSafeArmed(), status.Emplace(StatusCode::EMBER_ZCL_STATUS_CODE_BUSY)); VerifyOrExit(commissioningTimeout <= CommissioningWindowManager::MaxCommissioningTimeout(), globalStatus = InteractionModel::Status::InvalidCommand); VerifyOrExit(commissioningTimeout >= CommissioningWindowManager::MinCommissioningTimeout(), diff --git a/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp b/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp index 6330afe765a14d..056519629bf29b 100644 --- a/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp +++ b/src/app/clusters/general-commissioning-server/general-commissioning-server.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -162,22 +164,36 @@ bool emberAfGeneralCommissioningClusterArmFailSafeCallback(app::CommandHandler * FabricIndex accessingFabricIndex = commandObj->GetAccessingFabricIndex(); + // We do not allow CASE connections to arm the failsafe for the first time while the commissioning window is open in order + // to allow commissioners the opportunity to obtain this failsafe for the purpose of commissioning if (!failSafeContext.IsFailSafeBusy() && (!failSafeContext.IsFailSafeArmed() || failSafeContext.MatchesFabricIndex(accessingFabricIndex))) { - if (commandData.expiryLengthSeconds == 0) + // We do not allow CASE connections to arm the failsafe for the first time while the commissioning window is open in order + // to allow commissioners the opportunity to obtain this failsafe for the purpose of commissioning + if (!failSafeContext.IsFailSafeArmed() && + Server::GetInstance().GetCommissioningWindowManager().CommissioningWindowStatus() != + AdministratorCommissioning::CommissioningWindowStatus::kWindowNotOpen && + commandObj->GetSubjectDescriptor().authMode == Access::AuthMode::kCase) + { + response.errorCode = CommissioningError::kBusyWithOtherAdmin; + commandObj->AddResponse(commandPath, response); + } + else if (commandData.expiryLengthSeconds == 0) { // Force the timer to expire immediately. failSafeContext.ForceFailSafeTimerExpiry(); + response.errorCode = CommissioningError::kOk; + commandObj->AddResponse(commandPath, response); } else { CheckSuccess( failSafeContext.ArmFailSafe(accessingFabricIndex, System::Clock::Seconds16(commandData.expiryLengthSeconds)), Failure); + response.errorCode = CommissioningError::kOk; + commandObj->AddResponse(commandPath, response); } - response.errorCode = CommissioningError::kOk; - commandObj->AddResponse(commandPath, response); } else { diff --git a/src/app/server/CommissioningWindowManager.cpp b/src/app/server/CommissioningWindowManager.cpp index b62aef382a75a8..f2169f37835a21 100644 --- a/src/app/server/CommissioningWindowManager.cpp +++ b/src/app/server/CommissioningWindowManager.cpp @@ -22,6 +22,7 @@ #include #include #include +#include using namespace chip::app::Clusters; using namespace chip::System::Clock; @@ -94,6 +95,11 @@ void CommissioningWindowManager::ResetState() void CommissioningWindowManager::Cleanup() { StopAdvertisement(/* aShuttingDown = */ false); + DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + if (failSafeContext.IsFailSafeArmed()) + { + failSafeContext.ForceFailSafeTimerExpiry(); + } ResetState(); } @@ -161,6 +167,23 @@ void CommissioningWindowManager::OnSessionEstablished() DeviceLayer::PlatformMgr().AddEventHandler(OnPlatformEventWrapper, reinterpret_cast(this)); StopAdvertisement(/* aShuttingDown = */ false); + + DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + // This should never be armed because we don't allow CASE sessions to arm the failsafe when the commissioning window is open and + // we check that the failsafe is not armed before opening the commissioning window. None the less, it is good to double-check. + if (failSafeContext.IsFailSafeArmed()) + { + ChipLogError(AppServer, "Error - arm failsafe is already armed on PASE session establishment completion"); + } + else + { + err = failSafeContext.ArmFailSafe(kUndefinedFabricId, System::Clock::Seconds16(60)); + if (err != CHIP_NO_ERROR) + { + ChipLogError(AppServer, "Error arming failsafe on PASE session establishment completion"); + } + } + ChipLogProgress(AppServer, "Device completed Rendezvous process"); } @@ -169,6 +192,8 @@ CHIP_ERROR CommissioningWindowManager::OpenCommissioningWindow(Seconds16 commiss VerifyOrReturnError(commissioningTimeout <= MaxCommissioningTimeout() && commissioningTimeout >= mMinCommissioningTimeoutOverride.ValueOr(MinCommissioningTimeout()), CHIP_ERROR_INVALID_ARGUMENT); + DeviceLayer::FailSafeContext & failSafeContext = DeviceLayer::DeviceControlServer::DeviceControlSvr().GetFailSafeContext(); + VerifyOrReturnError(!failSafeContext.IsFailSafeArmed(), CHIP_ERROR_INCORRECT_STATE); ReturnErrorOnFailure(DeviceLayer::SystemLayer().StartTimer(commissioningTimeout, HandleCommissioningWindowTimeout, this)); diff --git a/src/controller/python/test/test_scripts/base.py b/src/controller/python/test/test_scripts/base.py index 874d636127ae6b..ca2f1419dd4abe 100644 --- a/src/controller/python/test/test_scripts/base.py +++ b/src/controller/python/test/test_scripts/base.py @@ -35,6 +35,7 @@ from chip.ChipStack import * import chip.FabricAdmin import copy +import secrets logger = logging.getLogger('PythonMatterControllerTEST') logger.setLevel(logging.INFO) @@ -269,6 +270,83 @@ def TestKeyExchange(self, ip: str, setuppin: int, nodeid: int): def TestUsedTestCommissioner(self): return self.devCtrl.GetTestCommissionerUsed() + def TestFailsafe(self, nodeid: int): + self.logger.info("Testing arm failsafe") + + self.logger.info("Setting failsafe on CASE connection") + err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid, + 0, 0, dict(expiryLengthSeconds=60, breadcrumb=1), blocking=True) + if err != 0: + self.logger.error( + "Failed to send arm failsafe command error is {} with im response{}".format(err, resp)) + return False + + if resp.errorCode is not Clusters.GeneralCommissioning.Enums.CommissioningError.kOk: + self.logger.error( + "Incorrect response received from arm failsafe - wanted OK, received {}".format(resp)) + return False + + self.logger.info( + "Attempting to open basic commissioning window - this should fail since the failsafe is armed") + try: + res = asyncio.run(self.devCtrl.SendCommand( + nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenBasicCommissioningWindow(180), timedRequestTimeoutMs=10000)) + # we actually want the exception here because we want to see a failure, so return False here + self.logger.error( + 'Incorrectly succeeded in opening basic commissioning window') + return False + except Exception as ex: + pass + + # TODO: pipe through the commissioning window opener so we can test enhanced properly. The pake verifier is just garbage because none of of the functions to calculate + # it or serialize it are available right now. However, this command should fail BEFORE that becomes an issue. + discriminator = 1111 + salt = secrets.token_bytes(16) + iterations = 2000 + # not the right size or the right contents, but it won't matter + verifier = secrets.token_bytes(32) + self.logger.info( + "Attempting to open enhanced commissioning window - this should fail since the failsafe is armed") + try: + res = asyncio.run(self.devCtrl.SendCommand(nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenCommissioningWindow( + commissioningTimeout=180, PAKEVerifier=verifier, discriminator=discriminator, iterations=iterations, salt=salt), timedRequestTimeoutMs=10000)) + # we actually want the exception here because we want to see a failure, so return False here + self.logger.error( + 'Incorrectly succeeded in opening enhanced commissioning window') + return False + except Exception as ex: + pass + + self.logger.info("Disarming failsafe on CASE connection") + err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid, + 0, 0, dict(expiryLengthSeconds=0, breadcrumb=1), blocking=True) + if err != 0: + self.logger.error( + "Failed to send arm failsafe command error is {} with im response{}".format(err, resp)) + return False + + self.logger.info( + "Opening Commissioning Window - this should succeed since the failsafe was just disarmed") + try: + res = asyncio.run(self.devCtrl.SendCommand( + nodeid, 0, Clusters.AdministratorCommissioning.Commands.OpenBasicCommissioningWindow(180), timedRequestTimeoutMs=10000)) + except Exception as ex: + self.logger.error( + 'Failed to open commissioning window after disarming failsafe') + return False + + self.logger.info( + "Attempting to arm failsafe over CASE - this should fail since the commissioning window is open") + err, resp = self.devCtrl.ZCLSend("GeneralCommissioning", "ArmFailSafe", nodeid, + 0, 0, dict(expiryLengthSeconds=60, breadcrumb=1), blocking=True) + if err != 0: + self.logger.error( + "Failed to send arm failsafe command error is {} with im response{}".format(err, resp)) + return False + if resp.errorCode is Clusters.GeneralCommissioning.Enums.CommissioningError.kBusyWithOtherAdmin: + return True + return False + async def TestMultiFabric(self, ip: str, setuppin: int, nodeid: int): self.logger.info("Opening Commissioning Window") diff --git a/src/controller/python/test/test_scripts/failsafe_tests.py b/src/controller/python/test/test_scripts/failsafe_tests.py new file mode 100755 index 00000000000000..fb21f0826b0523 --- /dev/null +++ b/src/controller/python/test/test_scripts/failsafe_tests.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +# +# Copyright (c) 2021 Project CHIP Authors +# All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Commissioning test. +import os +import sys +from optparse import OptionParser +from base import TestFail, TestTimeout, BaseTestHelper, FailIfNot, logger +from cluster_objects import NODE_ID, ClusterObjectTests +from network_commissioning import NetworkCommissioningTests +import asyncio + +# The thread network dataset tlv for testing, splited into T-L-V. + +TEST_THREAD_NETWORK_DATASET_TLV = "0e080000000000010000" + \ + "000300000c" + \ + "35060004001fffe0" + \ + "0208fedcba9876543210" + \ + "0708fd00000000001234" + \ + "0510ffeeddccbbaa99887766554433221100" + \ + "030e54657374696e674e6574776f726b" + \ + "0102d252" + \ + "041081cb3b2efa781cc778397497ff520fa50c0302a0ff" +# Network id, for the thread network, current a const value, will be changed to XPANID of the thread network. +TEST_THREAD_NETWORK_ID = "fedcba9876543210" +TEST_DISCRIMINATOR = 3840 + +ENDPOINT_ID = 0 +LIGHTING_ENDPOINT_ID = 1 +GROUP_ID = 0 + + +def main(): + optParser = OptionParser() + optParser.add_option( + "-t", + "--timeout", + action="store", + dest="testTimeout", + default=75, + type='int', + help="The program will return with timeout after specified seconds.", + metavar="", + ) + optParser.add_option( + "-a", + "--address", + action="store", + dest="deviceAddress", + default='', + type='str', + help="Address of the device", + metavar="", + ) + optParser.add_option( + "-p", + "--paa-trust-store-path", + action="store", + dest="paaTrustStorePath", + default='', + type='str', + help="Path that contains valid and trusted PAA Root Certificates.", + metavar="" + ) + + (options, remainingArgs) = optParser.parse_args(sys.argv[1:]) + + timeoutTicker = TestTimeout(options.testTimeout) + timeoutTicker.start() + + test = BaseTestHelper( + nodeid=112233, paaTrustStorePath=options.paaTrustStorePath, testCommissioner=False) + + logger.info("Testing discovery") + FailIfNot(test.TestDiscovery(discriminator=TEST_DISCRIMINATOR), + "Failed to discover any devices.") + + FailIfNot(test.SetNetworkCommissioningParameters(dataset=TEST_THREAD_NETWORK_DATASET_TLV), + "Failed to finish network commissioning") + + logger.info("Testing key exchange") + FailIfNot(test.TestKeyExchange(ip=options.deviceAddress, + setuppin=20202021, + nodeid=1), + "Failed to finish key exchange") + + FailIfNot(test.TestFailsafe(nodeid=1), "Failed failsafe test") + + timeoutTicker.stop() + + logger.info("Test finished") + + # TODO: Python device controller cannot be shutdown clean sometimes and will block on AsyncDNSResolverSockets shutdown. + # Call os._exit(0) to force close it. + os._exit(0) + + +if __name__ == "__main__": + try: + main() + except Exception as ex: + logger.exception(ex) + TestFail("Exception occurred when running tests.") diff --git a/src/test_driver/linux-cirque/FailsafeTest.py b/src/test_driver/linux-cirque/FailsafeTest.py new file mode 100755 index 00000000000000..7a6fd503b7ad8c --- /dev/null +++ b/src/test_driver/linux-cirque/FailsafeTest.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Copyright (c) 2021 Project CHIP Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import logging +import os +import pprint +import time +import sys + +from helper.CHIPTestBase import CHIPVirtualHome + +logger = logging.getLogger('MobileDeviceTest') +logger.setLevel(logging.INFO) + +sh = logging.StreamHandler() +sh.setFormatter( + logging.Formatter( + '%(asctime)s [%(name)s] %(levelname)s %(message)s')) +logger.addHandler(sh) + +CHIP_PORT = 5540 + +CIRQUE_URL = "http://localhost:5000" +CHIP_REPO = os.path.join(os.path.abspath( + os.path.dirname(__file__)), "..", "..", "..") +TEST_EXTPANID = "fedcba9876543210" +TEST_DISCRIMINATOR = 3840 +MATTER_DEVELOPMENT_PAA_ROOT_CERTS = "credentials/development/paa-root-certs" + +DEVICE_CONFIG = { + 'device0': { + 'type': 'MobileDevice', + 'base_image': 'connectedhomeip/chip-cirque-device-base', + 'capability': ['TrafficControl', 'Mount'], + 'rcp_mode': True, + 'docker_network': 'Ipv6', + 'traffic_control': {'latencyMs': 100}, + "mount_pairs": [[CHIP_REPO, CHIP_REPO]], + }, + 'device1': { + 'type': 'CHIPEndDevice', + 'base_image': 'connectedhomeip/chip-cirque-device-base', + 'capability': ['Thread', 'TrafficControl', 'Mount'], + 'rcp_mode': True, + 'docker_network': 'Ipv6', + 'traffic_control': {'latencyMs': 100}, + "mount_pairs": [[CHIP_REPO, CHIP_REPO]], + } +} + + +class TestFailsafe(CHIPVirtualHome): + def __init__(self, device_config): + super().__init__(CIRQUE_URL, device_config) + self.logger = logger + + def setup(self): + self.initialize_home() + + def test_routine(self): + self.run_controller_test() + + def run_controller_test(self): + ethernet_ip = [device['description']['ipv6_addr'] for device in self.non_ap_devices + if device['type'] == 'CHIPEndDevice'][0] + server_ids = [device['id'] for device in self.non_ap_devices + if device['type'] == 'CHIPEndDevice'] + req_ids = [device['id'] for device in self.non_ap_devices + if device['type'] == 'MobileDevice'] + + for server in server_ids: + self.execute_device_cmd(server, "CHIPCirqueDaemon.py -- run gdb -return-child-result -q -ex \"set pagination off\" -ex run -ex \"bt 25\" --args {} --thread --discriminator {}".format( + os.path.join(CHIP_REPO, "out/debug/standalone/chip-all-clusters-app"), TEST_DISCRIMINATOR)) + + self.reset_thread_devices(server_ids) + + req_device_id = req_ids[0] + + self.execute_device_cmd(req_device_id, "pip3 install {}".format(os.path.join( + CHIP_REPO, "out/debug/linux_x64_gcc/controller/python/chip-0.0-cp37-abi3-linux_x86_64.whl"))) + + command = "gdb -return-child-result -q -ex run -ex bt --args python3 {} -t 150 -a {} --paa-trust-store-path {}".format( + os.path.join( + CHIP_REPO, "src/controller/python/test/test_scripts/failsafe_tests.py"), + ethernet_ip, + os.path.join(CHIP_REPO, MATTER_DEVELOPMENT_PAA_ROOT_CERTS)) + ret = self.execute_device_cmd(req_device_id, command) + + self.assertEqual(ret['return_code'], '0', + "Test failed: non-zero return code") + + +if __name__ == "__main__": + sys.exit(TestFailsafe(DEVICE_CONFIG).run_test())