Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ThreadSanitizer failure in controller factory. #26824

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/darwin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jobs:
TEST_RUNNER_ASAN_OPTIONS=__CURRENT_VALUE__:detect_stack_use_after_return=1 xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableAddressSanitizer YES -enableUndefinedBehaviorSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_UBSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-asan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-asan-err.log >&2)
# -enableThreadSanitizer instruments the code in Matter.framework,
# but to instrument the code in the underlying libCHIP we need to pass CHIP_IS_TSAN=YES
#xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableThreadSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_TSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-tsan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-tsan-err.log >&2)
xcodebuild test -target "Matter" -scheme "Matter Framework Tests" -sdk macosx -enableThreadSanitizer YES OTHER_CFLAGS='${inherited} -Werror -Wconversion -Wno-incomplete-umbrella -Wno-unguarded-availability-new' CHIP_IS_TSAN=YES > >(tee /tmp/darwin/framework-tests/darwin-tests-tsan.log) 2> >(tee /tmp/darwin/framework-tests/darwin-tests-tsan-err.log >&2)
working-directory: src/darwin/Framework
- name: Uploading log files
uses: actions/upload-artifact@v3
Expand Down
64 changes: 62 additions & 2 deletions src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#import "MTRPersistentStorageDelegateBridge.h"
#import "NSDataSpanConversion.h"

#import <os/lock.h>

#include <controller/CHIPDeviceControllerFactory.h>
#include <credentials/CHIPCert.h>
#include <credentials/FabricTable.h>
Expand Down Expand Up @@ -88,6 +90,22 @@ @interface MTRDeviceControllerFactory ()
@property () chip::Credentials::DeviceAttestationVerifier * deviceAttestationVerifier;
@property (readonly) BOOL advertiseOperational;
@property (nonatomic, readonly) Credentials::IgnoreCertificateValidityPeriodPolicy * certificateValidityPolicy;
// Lock used to serialize access to the "controllers" array, since it needs to
// be touched from both whatever queue is starting controllers and from the
// Matter queue. The way this lock is used assumes that:
//
// 1) The only mutating accesses to the controllers array happen when the
// current queue is not the Matter queue. This is a good assumption, because
// the implementation of the fucntions that mutate the array do sync dispatch
// to the Matter queue, which would deadlock if they were called when that
// queue was the current queue.
// 2) It's our API consumer's responsibility to serialize access to us from
// outside.
//
// This means that we only take the lock around mutations of the array and
// accesses to the array that are from code running on the Matter queue.

@property (nonatomic, readonly) os_unfair_lock controllersLock;

- (BOOL)findMatchingFabric:(FabricTable &)fabricTable
params:(MTRDeviceControllerStartupParams *)params
Expand Down Expand Up @@ -123,6 +141,7 @@ - (instancetype)init
_running = NO;
_chipWorkQueue = DeviceLayer::PlatformMgrImpl().GetWorkQueue();
_controllerFactory = &DeviceControllerFactory::GetInstance();
_controllersLock = OS_UNFAIR_LOCK_INIT;

_sessionKeystore = new chip::Crypto::RawKeySessionKeystore();
if ([self checkForInitError:(_sessionKeystore != nullptr) logMsg:kErrorSessionKeystoreInit]) {
Expand Down Expand Up @@ -166,8 +185,15 @@ - (void)dealloc
[self cleanupInitObjects];
}

- (void)_assertCurrentQueueIsNotMatterQueue
{
VerifyOrDie(!DeviceLayer::PlatformMgrImpl().IsWorkQueueCurrentQueue());
}

- (BOOL)checkIsRunning:(NSError * __autoreleasing *)error
{
[self _assertCurrentQueueIsNotMatterQueue];

if ([self isRunning]) {
return YES;
}
Expand Down Expand Up @@ -255,6 +281,8 @@ - (void)cleanupStartupObjects

- (nullable NSArray<MTRFabricInfo *> *)knownFabrics
{
[self _assertCurrentQueueIsNotMatterQueue];

if (!self.isRunning) {
return nil;
}
Expand Down Expand Up @@ -303,6 +331,8 @@ - (void)cleanupStartupObjects

- (BOOL)startControllerFactory:(MTRDeviceControllerFactoryParams *)startupParams error:(NSError * __autoreleasing *)error;
{
[self _assertCurrentQueueIsNotMatterQueue];

if ([self isRunning]) {
MTR_LOG_DEBUG("Ignoring duplicate call to startup, Matter controller factory already started...");
return YES;
Expand Down Expand Up @@ -501,6 +531,8 @@ - (BOOL)startControllerFactory:(MTRDeviceControllerFactoryParams *)startupParams

- (void)stopControllerFactory
{
[self _assertCurrentQueueIsNotMatterQueue];

if (![self isRunning]) {
return;
}
Expand All @@ -524,6 +556,8 @@ - (void)stopControllerFactory
- (MTRDeviceController * _Nullable)createControllerOnExistingFabric:(MTRDeviceControllerStartupParams *)startupParams
error:(NSError * __autoreleasing *)error
{
[self _assertCurrentQueueIsNotMatterQueue];

if (![self checkIsRunning:error]) {
MTR_LOG_ERROR("Trying to start controller while Matter controller factory is not running");
return nil;
Expand Down Expand Up @@ -615,6 +649,8 @@ - (MTRDeviceController * _Nullable)createControllerOnExistingFabric:(MTRDeviceCo
- (MTRDeviceController * _Nullable)createControllerOnNewFabric:(MTRDeviceControllerStartupParams *)startupParams
error:(NSError * __autoreleasing *)error
{
[self _assertCurrentQueueIsNotMatterQueue];

if (![self isRunning]) {
MTR_LOG_ERROR("Trying to start controller while Matter controller factory is not running");
return nil;
Expand Down Expand Up @@ -699,6 +735,8 @@ - (MTRDeviceController * _Nullable)createControllerOnNewFabric:(MTRDeviceControl

- (MTRDeviceController * _Nullable)createController
{
[self _assertCurrentQueueIsNotMatterQueue];

MTRDeviceController * controller = [[MTRDeviceController alloc] initWithFactory:self queue:_chipWorkQueue];
if (controller == nil) {
MTR_LOG_ERROR("Failed to init controller");
Expand All @@ -716,7 +754,9 @@ - (MTRDeviceController * _Nullable)createController

// Add the controller to _controllers now, so if we fail partway through its
// startup we will still do the right cleanups.
os_unfair_lock_lock(&_controllersLock);
[_controllers addObject:controller];
os_unfair_lock_unlock(&_controllersLock);

return controller;
}
Expand All @@ -733,6 +773,8 @@ - (BOOL)findMatchingFabric:(FabricTable &)fabricTable
params:(MTRDeviceControllerStartupParams *)params
fabric:(const FabricInfo * _Nullable * _Nonnull)fabric
{
assertChipStackLockedByCurrentThread();

CHIP_ERROR err = fabricTable.Init(
{ .storage = _persistentStorageDelegateBridge, .operationalKeystore = _keystore, .opCertStore = _opCertStore });
if (err != CHIP_NO_ERROR) {
Expand Down Expand Up @@ -768,6 +810,8 @@ - (BOOL)findMatchingFabric:(FabricTable &)fabricTable
//
- (MTRDeviceController * _Nullable)maybeInitializeOTAProvider:(MTRDeviceController * _Nonnull)controller
{
[self _assertCurrentQueueIsNotMatterQueue];

VerifyOrReturnValue(_otaProviderDelegateBridge != nil, controller);
VerifyOrReturnValue([_controllers count] == 1, controller);

Expand All @@ -791,6 +835,8 @@ @implementation MTRDeviceControllerFactory (InternalMethods)

- (void)controllerShuttingDown:(MTRDeviceController *)controller
{
[self _assertCurrentQueueIsNotMatterQueue];

if (![_controllers containsObject:controller]) {
MTR_LOG_ERROR("Controller we don't know about shutting down");
return;
Expand All @@ -808,7 +854,9 @@ - (void)controllerShuttingDown:(MTRDeviceController *)controller
});
}

os_unfair_lock_lock(&_controllersLock);
[_controllers removeObject:controller];
os_unfair_lock_unlock(&_controllersLock);

if ([_controllers count] == 0) {
dispatch_sync(_chipWorkQueue, ^{
Expand Down Expand Up @@ -841,7 +889,13 @@ - (void)controllerShuttingDown:(MTRDeviceController *)controller

- (nullable MTRDeviceController *)runningControllerForFabricIndex:(chip::FabricIndex)fabricIndex
{
for (MTRDeviceController * existing in _controllers) {
assertChipStackLockedByCurrentThread();

os_unfair_lock_lock(&_controllersLock);
NSArray<MTRDeviceController *> * controllersCopy = [_controllers copy];
os_unfair_lock_unlock(&_controllersLock);

for (MTRDeviceController * existing in controllersCopy) {
if ([existing fabricIndex] == fabricIndex) {
return existing;
}
Expand All @@ -852,7 +906,13 @@ - (nullable MTRDeviceController *)runningControllerForFabricIndex:(chip::FabricI

- (void)operationalInstanceAdded:(chip::PeerId &)operationalID
{
for (MTRDeviceController * controller in _controllers) {
assertChipStackLockedByCurrentThread();
bzbarsky-apple marked this conversation as resolved.
Show resolved Hide resolved

os_unfair_lock_lock(&_controllersLock);
NSArray<MTRDeviceController *> * controllersCopy = [_controllers copy];
os_unfair_lock_unlock(&_controllersLock);

for (MTRDeviceController * controller in controllersCopy) {
auto * compressedFabricId = controller.compressedFabricID;
if (compressedFabricId != nil && compressedFabricId.unsignedLongLongValue == operationalID.GetCompressedFabricId()) {
ChipLogProgress(Controller, "Notifying controller at fabric index %u about new operational node 0x" ChipLogFormatX64,
Expand Down
9 changes: 7 additions & 2 deletions src/platform/Darwin/PlatformManagerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ CHIP_ERROR PlatformManagerImpl::_StopEventLoopTask()
if (!mIsWorkQueueSuspended && !mIsWorkQueueSuspensionPending)
{
mIsWorkQueueSuspensionPending = true;
if (dispatch_get_current_queue() != mWorkQueue)
if (!IsWorkQueueCurrentQueue())
{
// dispatch_sync is used in order to guarantee serialization of the caller with
// respect to any tasks that might already be on the queue, or running.
Expand Down Expand Up @@ -157,10 +157,15 @@ bool PlatformManagerImpl::_IsChipStackLockedByCurrentThread() const
{
// If we have no work queue, or it's suspended, then we assume our caller
// knows what they are doing in terms of their own concurrency.
return !mWorkQueue || mIsWorkQueueSuspended || dispatch_get_current_queue() == mWorkQueue;
return !mWorkQueue || mIsWorkQueueSuspended || IsWorkQueueCurrentQueue();
};
#endif

bool PlatformManagerImpl::IsWorkQueueCurrentQueue() const
{
return dispatch_get_current_queue() == mWorkQueue;
}

CHIP_ERROR PlatformManagerImpl::StartBleScan(BleScannerDelegate * delegate)
{
#if CONFIG_NETWORK_LAYER_BLE
Expand Down
2 changes: 2 additions & 0 deletions src/platform/Darwin/PlatformManagerImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class PlatformManagerImpl final : public PlatformManager, public Internal::Gener
return mWorkQueue;
}

bool IsWorkQueueCurrentQueue() const;

CHIP_ERROR StartBleScan(BleScannerDelegate * delegate = nullptr);
CHIP_ERROR StopBleScan();
CHIP_ERROR PrepareCommissioning();
Expand Down