diff --git a/.github/workflows/darwin.yaml b/.github/workflows/darwin.yaml index 053eb96af69796..a7d8f31a819124 100644 --- a/.github/workflows/darwin.yaml +++ b/.github/workflows/darwin.yaml @@ -155,7 +155,7 @@ jobs: TEST_RUNNER_ASAN_OPTIONS=__CURRENT_VALUE__:detect_stack_use_after_return=1 xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableAddressSanitizer YES -enableUndefinedBehaviorSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_UBSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-asan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-asan-err.log >&2) # -enableThreadSanitizer instruments the code in Matter.framework, # but to instrument the code in the underlying libCHIP we need to pass CHIP_IS_TSAN=YES - #xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableThreadSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_TSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-tsan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-tsan-err.log >&2) + xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableThreadSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_TSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-tsan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-tsan-err.log >&2) working-directory: src/darwin/Framework - name: Uploading log files uses: actions/upload-artifact@v3 diff --git a/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm b/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm index 93f7abeb91a979..5bd050b0df028e 100644 --- a/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm +++ b/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm @@ -34,6 +34,8 @@ #import "MTRPersistentStorageDelegateBridge.h" #import "NSDataSpanConversion.h" +#import + #include #include #include @@ -88,6 +90,22 @@ @interface MTRDeviceControllerFactory () @property () chip::Credentials::DeviceAttestationVerifier * deviceAttestationVerifier; @property (readonly) BOOL advertiseOperational; @property (nonatomic, readonly) Credentials::IgnoreCertificateValidityPeriodPolicy * certificateValidityPolicy; +// Lock used to serialize access to the "controllers" array, since it needs to +// be touched from both whatever queue is starting controllers and from the +// Matter queue. The way this lock is used assumes that: +// +// 1) The only mutating accesses to the controllers array happen when the +// current queue is not the Matter queue. This is a good assumption, because +// the implementation of the fucntions that mutate the array do sync dispatch +// to the Matter queue, which would deadlock if they were called when that +// queue was the current queue. +// 2) It's our API consumer's responsibility to serialize access to us from +// outside. +// +// This means that we only take the lock around mutations of the array and +// accesses to the array that are from code running on the Matter queue. + +@property (nonatomic, readonly) os_unfair_lock controllersLock; - (BOOL)findMatchingFabric:(FabricTable &)fabricTable params:(MTRDeviceControllerStartupParams *)params @@ -123,6 +141,7 @@ - (instancetype)init _running = NO; _chipWorkQueue = DeviceLayer::PlatformMgrImpl().GetWorkQueue(); _controllerFactory = &DeviceControllerFactory::GetInstance(); + _controllersLock = OS_UNFAIR_LOCK_INIT; _sessionKeystore = new chip::Crypto::RawKeySessionKeystore(); if ([self checkForInitError:(_sessionKeystore != nullptr) logMsg:kErrorSessionKeystoreInit]) { @@ -166,8 +185,15 @@ - (void)dealloc [self cleanupInitObjects]; } +- (void)_assertCurrentQueueIsNotMatterQueue +{ + VerifyOrDie(!DeviceLayer::PlatformMgrImpl().IsWorkQueueCurrentQueue()); +} + - (BOOL)checkIsRunning:(NSError * __autoreleasing *)error { + [self _assertCurrentQueueIsNotMatterQueue]; + if ([self isRunning]) { return YES; } @@ -255,6 +281,8 @@ - (void)cleanupStartupObjects - (nullable NSArray *)knownFabrics { + [self _assertCurrentQueueIsNotMatterQueue]; + if (!self.isRunning) { return nil; } @@ -303,6 +331,8 @@ - (void)cleanupStartupObjects - (BOOL)startControllerFactory:(MTRDeviceControllerFactoryParams *)startupParams error:(NSError * __autoreleasing *)error; { + [self _assertCurrentQueueIsNotMatterQueue]; + if ([self isRunning]) { MTR_LOG_DEBUG("Ignoring duplicate call to startup, Matter controller factory already started..."); return YES; @@ -501,6 +531,8 @@ - (BOOL)startControllerFactory:(MTRDeviceControllerFactoryParams *)startupParams - (void)stopControllerFactory { + [self _assertCurrentQueueIsNotMatterQueue]; + if (![self isRunning]) { return; } @@ -524,6 +556,8 @@ - (void)stopControllerFactory - (MTRDeviceController * _Nullable)createControllerOnExistingFabric:(MTRDeviceControllerStartupParams *)startupParams error:(NSError * __autoreleasing *)error { + [self _assertCurrentQueueIsNotMatterQueue]; + if (![self checkIsRunning:error]) { MTR_LOG_ERROR("Trying to start controller while Matter controller factory is not running"); return nil; @@ -615,6 +649,8 @@ - (MTRDeviceController * _Nullable)createControllerOnExistingFabric:(MTRDeviceCo - (MTRDeviceController * _Nullable)createControllerOnNewFabric:(MTRDeviceControllerStartupParams *)startupParams error:(NSError * __autoreleasing *)error { + [self _assertCurrentQueueIsNotMatterQueue]; + if (![self isRunning]) { MTR_LOG_ERROR("Trying to start controller while Matter controller factory is not running"); return nil; @@ -699,6 +735,8 @@ - (MTRDeviceController * _Nullable)createControllerOnNewFabric:(MTRDeviceControl - (MTRDeviceController * _Nullable)createController { + [self _assertCurrentQueueIsNotMatterQueue]; + MTRDeviceController * controller = [[MTRDeviceController alloc] initWithFactory:self queue:_chipWorkQueue]; if (controller == nil) { MTR_LOG_ERROR("Failed to init controller"); @@ -716,7 +754,9 @@ - (MTRDeviceController * _Nullable)createController // Add the controller to _controllers now, so if we fail partway through its // startup we will still do the right cleanups. + os_unfair_lock_lock(&_controllersLock); [_controllers addObject:controller]; + os_unfair_lock_unlock(&_controllersLock); return controller; } @@ -733,6 +773,8 @@ - (BOOL)findMatchingFabric:(FabricTable &)fabricTable params:(MTRDeviceControllerStartupParams *)params fabric:(const FabricInfo * _Nullable * _Nonnull)fabric { + assertChipStackLockedByCurrentThread(); + CHIP_ERROR err = fabricTable.Init( { .storage = _persistentStorageDelegateBridge, .operationalKeystore = _keystore, .opCertStore = _opCertStore }); if (err != CHIP_NO_ERROR) { @@ -768,6 +810,8 @@ - (BOOL)findMatchingFabric:(FabricTable &)fabricTable // - (MTRDeviceController * _Nullable)maybeInitializeOTAProvider:(MTRDeviceController * _Nonnull)controller { + [self _assertCurrentQueueIsNotMatterQueue]; + VerifyOrReturnValue(_otaProviderDelegateBridge != nil, controller); VerifyOrReturnValue([_controllers count] == 1, controller); @@ -791,6 +835,8 @@ @implementation MTRDeviceControllerFactory (InternalMethods) - (void)controllerShuttingDown:(MTRDeviceController *)controller { + [self _assertCurrentQueueIsNotMatterQueue]; + if (![_controllers containsObject:controller]) { MTR_LOG_ERROR("Controller we don't know about shutting down"); return; @@ -808,7 +854,9 @@ - (void)controllerShuttingDown:(MTRDeviceController *)controller }); } + os_unfair_lock_lock(&_controllersLock); [_controllers removeObject:controller]; + os_unfair_lock_unlock(&_controllersLock); if ([_controllers count] == 0) { dispatch_sync(_chipWorkQueue, ^{ @@ -841,7 +889,13 @@ - (void)controllerShuttingDown:(MTRDeviceController *)controller - (nullable MTRDeviceController *)runningControllerForFabricIndex:(chip::FabricIndex)fabricIndex { - for (MTRDeviceController * existing in _controllers) { + assertChipStackLockedByCurrentThread(); + + os_unfair_lock_lock(&_controllersLock); + NSArray * controllersCopy = [_controllers copy]; + os_unfair_lock_unlock(&_controllersLock); + + for (MTRDeviceController * existing in controllersCopy) { if ([existing fabricIndex] == fabricIndex) { return existing; } @@ -852,7 +906,13 @@ - (nullable MTRDeviceController *)runningControllerForFabricIndex:(chip::FabricI - (void)operationalInstanceAdded:(chip::PeerId &)operationalID { - for (MTRDeviceController * controller in _controllers) { + assertChipStackLockedByCurrentThread(); + + os_unfair_lock_lock(&_controllersLock); + NSArray * controllersCopy = [_controllers copy]; + os_unfair_lock_unlock(&_controllersLock); + + for (MTRDeviceController * controller in controllersCopy) { auto * compressedFabricId = controller.compressedFabricID; if (compressedFabricId != nil && compressedFabricId.unsignedLongLongValue == operationalID.GetCompressedFabricId()) { ChipLogProgress(Controller, "Notifying controller at fabric index %u about new operational node 0x" ChipLogFormatX64, diff --git a/src/platform/Darwin/PlatformManagerImpl.cpp b/src/platform/Darwin/PlatformManagerImpl.cpp index 6e78a8195b004f..f1d5e4841e761b 100644 --- a/src/platform/Darwin/PlatformManagerImpl.cpp +++ b/src/platform/Darwin/PlatformManagerImpl.cpp @@ -88,7 +88,7 @@ CHIP_ERROR PlatformManagerImpl::_StopEventLoopTask() if (!mIsWorkQueueSuspended && !mIsWorkQueueSuspensionPending) { mIsWorkQueueSuspensionPending = true; - if (dispatch_get_current_queue() != mWorkQueue) + if (!IsWorkQueueCurrentQueue()) { // dispatch_sync is used in order to guarantee serialization of the caller with // respect to any tasks that might already be on the queue, or running. @@ -157,10 +157,15 @@ bool PlatformManagerImpl::_IsChipStackLockedByCurrentThread() const { // If we have no work queue, or it's suspended, then we assume our caller // knows what they are doing in terms of their own concurrency. - return !mWorkQueue || mIsWorkQueueSuspended || dispatch_get_current_queue() == mWorkQueue; + return !mWorkQueue || mIsWorkQueueSuspended || IsWorkQueueCurrentQueue(); }; #endif +bool PlatformManagerImpl::IsWorkQueueCurrentQueue() const +{ + return dispatch_get_current_queue() == mWorkQueue; +} + CHIP_ERROR PlatformManagerImpl::StartBleScan(BleScannerDelegate * delegate) { #if CONFIG_NETWORK_LAYER_BLE diff --git a/src/platform/Darwin/PlatformManagerImpl.h b/src/platform/Darwin/PlatformManagerImpl.h index a0aff2ab21e439..79da67a4991d2f 100644 --- a/src/platform/Darwin/PlatformManagerImpl.h +++ b/src/platform/Darwin/PlatformManagerImpl.h @@ -56,6 +56,8 @@ class PlatformManagerImpl final : public PlatformManager, public Internal::Gener return mWorkQueue; } + bool IsWorkQueueCurrentQueue() const; + CHIP_ERROR StartBleScan(BleScannerDelegate * delegate = nullptr); CHIP_ERROR StopBleScan(); CHIP_ERROR PrepareCommissioning();