Skip to content

Commit

Permalink
chip-repl hits the Code is unsafe/racy assert when BLE commissioning …
Browse files Browse the repository at this point in the history
…is started (#26338)

* chip-repl hits the Code is unsafe/racy assert when BLE commissioning is started

* Add lock/unlock to CancelTimer

* Fix a few other issue

* Restyle

* Small fix

* Last few fixes
  • Loading branch information
tehampson authored and pull[bot] committed Apr 9, 2024
1 parent 0ee0e5f commit 2517501
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/controller/python/chip/ble/library_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ctypes import c_bool, c_char_p, c_uint32, c_void_p, py_object

import chip.native
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback


# This prevents python auto-casting c_void_p to integers and
Expand Down Expand Up @@ -58,7 +58,7 @@ def _GetBleLibraryHandle() -> ctypes.CDLL:
VoidPointer, [VoidPointer])

setter.Set('pychip_ble_start_scanning', VoidPointer, [
py_object, VoidPointer, c_uint32, DeviceScannedCallback, ScanDoneCallback
py_object, VoidPointer, c_uint32, DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback
])

return handle
23 changes: 19 additions & 4 deletions src/controller/python/chip/ble/scan_devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Generator

from chip.ble.library_handle import _GetBleLibraryHandle
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback


@DeviceScannedCallback
Expand All @@ -34,7 +34,12 @@ def ScanDoneCallback(closure):
closure.ScanCompleted()


def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, adapter=None):
@ScanErrorCallback
def ScanErrorCallback(closure, errorCode: int):
closure.ScanErrorCallback(errorCode)


def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, errorCallback, adapter=None):
"""Initiate a BLE discovery of devices with the given timeout.
NOTE: devices are not guaranteed to be unique. New entries are returned
Expand All @@ -44,6 +49,7 @@ def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, adapter=None):
timeoutMs: scan will complete after this time
scanCallback: callback when a device is found
doneCallback: callback when the scan is complete
errorCallback: callback when error occurred during scan
adapter: what adapter to choose. Either an AdapterInfo object or
a string with the adapter address. If None, the first
adapter on the system is used.
Expand Down Expand Up @@ -72,14 +78,17 @@ def ScanCompleted(self, *args):
doneCallback(*args)
ctypes.pythonapi.Py_DecRef(ctypes.py_object(self))

def ScanErrorCallback(self, *args):
errorCallback(*args)

closure = ScannerClosure()
ctypes.pythonapi.Py_IncRef(ctypes.py_object(closure))

scanner = handle.pychip_ble_start_scanning(
ctypes.py_object(closure),
handle.pychip_ble_adapter_list_get_raw_adapter(
nativeList), timeoutMs,
ScanFoundCallback, ScanDoneCallback)
ScanFoundCallback, ScanDoneCallback, ScanErrorCallback)

if scanner == 0:
raise Exception('Failed to initiate scan')
Expand Down Expand Up @@ -113,6 +122,12 @@ def DeviceFound(self, address, discriminator, vendor, product):
def ScanCompleted(self):
self.queue.put(None)

def ScanError(self, errorCode):
# TODO need to determine what we do with this error. Most of the time this
# error is just a timeout introduced in PR #24873, right before we get a
# ScanCompleted.
pass


def DiscoverSync(timeoutMs: int, adapter=None) -> Generator[DeviceInfo, None, None]:
"""Discover BLE devices over the specified period of time.
Expand All @@ -131,7 +146,7 @@ def DiscoverSync(timeoutMs: int, adapter=None) -> Generator[DeviceInfo, None, No

receiver = _DeviceInfoReceiver()
DiscoverAsync(timeoutMs, receiver.DeviceFound,
receiver.ScanCompleted, adapter)
receiver.ScanCompleted, receiver.ScanError, adapter)

while True:
data = receiver.queue.get()
Expand Down
2 changes: 2 additions & 0 deletions src/controller/python/chip/ble/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@
c_uint16, c_uint16)

ScanDoneCallback = CFUNCTYPE(None, py_object)

ScanErrorCallback = CFUNCTYPE(None, py_object, c_uint16)
28 changes: 20 additions & 8 deletions src/platform/Linux/bluez/ChipDeviceScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,16 @@ ChipDeviceScanner::~ChipDeviceScanner()
{
StopScan();

// In case the timeout timer is still active
chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
// mTimerExpired should only be set to true in the TimerExpiredCallback, which means we are in that callback
// right now so there is no need to cancel the timer. Doing so would result in deadlock trying to aquire the
// chip stack lock which we already currently have.
if (!mTimerExpired)
{
// In case the timeout timer is still active
DeviceLayer::PlatformMgr().LockChipStack();
chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
DeviceLayer::PlatformMgr().UnlockChipStack();
}

g_object_unref(mManager);
g_object_unref(mCancellable);
Expand Down Expand Up @@ -136,21 +144,25 @@ CHIP_ERROR ChipDeviceScanner::StartScan(System::Clock::Timeout timeout)
return CHIP_ERROR_INTERNAL;
}

DeviceLayer::PlatformMgr().LockChipStack();
CHIP_ERROR err = chip::DeviceLayer::SystemLayer().StartTimer(timeout, TimerExpiredCallback, static_cast<void *>(this));
DeviceLayer::PlatformMgr().UnlockChipStack();

if (err != CHIP_NO_ERROR)
{
ChipLogError(Ble, "Failed to schedule scan timeout.");
StopScan();
return err;
}
mTimerExpired = false;

return CHIP_NO_ERROR;
}

void ChipDeviceScanner::TimerExpiredCallback(chip::System::Layer * layer, void * appState)
{
ChipDeviceScanner * chipDeviceScanner = static_cast<ChipDeviceScanner *>(appState);
chipDeviceScanner->MarkTimerExpired();
chipDeviceScanner->mDelegate->OnScanError(CHIP_ERROR_TIMEOUT);
chipDeviceScanner->StopScan();
}
Expand Down Expand Up @@ -180,6 +192,11 @@ CHIP_ERROR ChipDeviceScanner::StopScan()
return CHIP_ERROR_INTERNAL;
}

ChipDeviceScannerDelegate * delegate = this->mDelegate;
// callback is explicitly allowed to delete the scanner (hence no more
// references to 'self' here)
delegate->OnScanComplete();

return CHIP_NO_ERROR;
}

Expand All @@ -192,12 +209,7 @@ CHIP_ERROR ChipDeviceScanner::MainLoopStopScan(ChipDeviceScanner * self)
ChipLogError(Ble, "Failed to stop discovery %s", error->message);
g_error_free(error);
}
ChipDeviceScannerDelegate * delegate = self->mDelegate;
self->mIsScanning = false;

// callback is explicitly allowed to delete the scanner (hence no more
// references to 'self' here)
delegate->OnScanComplete();
self->mIsScanning = false;

return CHIP_NO_ERROR;
}
Expand Down
5 changes: 5 additions & 0 deletions src/platform/Linux/bluez/ChipDeviceScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class ChipDeviceScanner
/// Stop any currently running scan
CHIP_ERROR StopScan();

/// Should only be called by TimerExpiredCallback.
void MarkTimerExpired() { mTimerExpired = true; }

/// Create a new device scanner
///
/// Convenience method to allocate any required variables.
Expand Down Expand Up @@ -101,6 +104,8 @@ class ChipDeviceScanner
gulong mInterfaceChangedSignal = 0;
bool mIsScanning = false;
bool mIsStopping = false;
/// Used to track if timer has alread expired and doesn't need to be canceled.
bool mTimerExpired = false;
};

} // namespace Internal
Expand Down

0 comments on commit 2517501

Please sign in to comment.