Skip to content

Commit

Permalink
[Darwin] MTRDevice in-memory cache needs to be resilient to data stor…
Browse files Browse the repository at this point in the history
…e returning nil (project-chip#34182)

* [Darwin] MTRDevice in-memory cache needs to be resilient to data store returning nil

* Added tests and address PR comment

* Fixed log string

* Additional logging

* Fixed SubscriptionCallback object handling on invalidate and subscription reset

* Fixed error string for MTRErrorTests/testErrorSourcePaths
  • Loading branch information
jtung-apple authored Jul 22, 2024
1 parent 27c640c commit d666567
Show file tree
Hide file tree
Showing 5 changed files with 255 additions and 38 deletions.
88 changes: 84 additions & 4 deletions src/darwin/Framework/CHIP/MTRDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -908,12 +908,17 @@ - (void)invalidate
_reattemptingSubscription = NO;

[_deviceController asyncDispatchToMatterQueue:^{
MTR_LOG("%@ invalidate disconnecting ReadClient and SubscriptionCallback", self);

// Destroy the read client and callback (has to happen on the Matter
// queue, to avoid deleting objects that are being referenced), to
// tear down the subscription. We will get no more callbacks from
// the subscrption after this point.
std::lock_guard lock(self->_lock);
self->_currentReadClient = nullptr;
if (self->_currentSubscriptionCallback) {
delete self->_currentSubscriptionCallback;
}
self->_currentSubscriptionCallback = nullptr;

[self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
Expand All @@ -940,6 +945,7 @@ - (void)nodeMayBeAdvertisingOperational
// whether it might be.
- (void)_triggerResubscribeWithReason:(NSString *)reason nodeLikelyReachable:(BOOL)nodeLikelyReachable
{
MTR_LOG("%@ _triggerResubscribeWithReason called with reason %@", self, reason);
assertChipStackLockedByCurrentThread();

// We might want to trigger a resubscribe on our existing ReadClient. Do
Expand Down Expand Up @@ -1235,6 +1241,12 @@ - (void)_handleSubscriptionEstablished
- (void)_handleSubscriptionError:(NSError *)error
{
std::lock_guard lock(_lock);
[self _doHandleSubscriptionError:error];
}

- (void)_doHandleSubscriptionError:(NSError *)error
{
os_unfair_lock_assert_owner(&_lock);

[self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
_unreportedEvents = nil;
Expand Down Expand Up @@ -1400,6 +1412,12 @@ - (void)_handleResubscriptionNeededWithDelay:(NSNumber *)resubscriptionDelayMs
- (void)_handleSubscriptionReset:(NSNumber * _Nullable)retryDelay
{
std::lock_guard lock(_lock);
[self _doHandleSubscriptionReset:retryDelay];
}

- (void)_doHandleSubscriptionReset:(NSNumber * _Nullable)retryDelay
{
os_unfair_lock_assert_owner(&_lock);

// If we are here, then either we failed to establish initial CASE, or we
// failed to send the initial SubscribeRequest message, or our ReadClient
Expand Down Expand Up @@ -1471,7 +1489,7 @@ - (void)_reattemptSubscriptionNowIfNeededWithReason:(NSString *)reason
return;
}

MTR_LOG("%@ reattempting subscription", self);
MTR_LOG("%@ reattempting subscription with reason %@", self, reason);
self.reattemptingSubscription = NO;
[self _setupSubscriptionWithReason:reason];
}
Expand Down Expand Up @@ -2100,6 +2118,22 @@ - (void)unitTestClearClusterData
}
#endif

- (void)_reconcilePersistedClustersWithStorage
{
os_unfair_lock_assert_owner(&self->_lock);

NSMutableSet * clusterPathsToRemove = [NSMutableSet set];
for (MTRClusterPath * clusterPath in _persistedClusters) {
MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
if (!data) {
[clusterPathsToRemove addObject:clusterPath];
}
}

MTR_LOG_ERROR("%@ Storage missing %lu / %lu clusters - reconciling in-memory records", self, static_cast<unsigned long>(clusterPathsToRemove.count), static_cast<unsigned long>(_persistedClusters.count));
[_persistedClusters minusSet:clusterPathsToRemove];
}

- (nullable MTRDeviceClusterData *)_clusterDataForPath:(MTRClusterPath *)clusterPath
{
os_unfair_lock_assert_owner(&self->_lock);
Expand Down Expand Up @@ -2132,8 +2166,16 @@ - (nullable MTRDeviceClusterData *)_clusterDataForPath:(MTRClusterPath *)cluster

// Page in the stored value for the data.
MTRDeviceClusterData * data = [_deviceController.controllerDataStore getStoredClusterDataForNodeID:_nodeID endpointID:clusterPath.endpoint clusterID:clusterPath.cluster];
MTR_LOG("%@ cluster path %@ cache miss - load from storage success %@", self, clusterPath, YES_NO(data));
if (data != nil) {
[_persistedClusterData setObject:data forKey:clusterPath];
} else {
// If clusterPath is in _persistedClusters and the data store returns nil for it, then the in-memory cache is now not dependable, and subscription should be reset and reestablished to reload cache from device

// First make sure _persistedClusters is consistent with storage, so repeated calls don't immediately re-trigger this
[self _reconcilePersistedClustersWithStorage];

[self _resetSubscriptionWithReasonString:[NSString stringWithFormat:@"Data store has no data for cluster %@", clusterPath]];
}

return data;
Expand Down Expand Up @@ -2303,13 +2345,43 @@ - (void)_stopConnectivityMonitoring
}
}

- (void)_resetSubscriptionWithReasonString:(NSString *)reasonString
{
os_unfair_lock_assert_owner(&self->_lock);
MTR_LOG_ERROR("%@ %@ - resetting subscription", self, reasonString);

[_deviceController asyncDispatchToMatterQueue:^{
MTR_LOG("%@ subscription reset disconnecting ReadClient and SubscriptionCallback", self);

std::lock_guard lock(self->_lock);
self->_currentReadClient = nullptr;
if (self->_currentSubscriptionCallback) {
delete self->_currentSubscriptionCallback;
}
self->_currentSubscriptionCallback = nullptr;

[self _doHandleSubscriptionError:nil];
// Use nil reset delay so that this keeps existing backoff timing
[self _doHandleSubscriptionReset:nil];
}
errorHandler:nil];
}

#ifdef DEBUG
- (void)unitTestResetSubscription
{
std::lock_guard lock(self->_lock);
[self _resetSubscriptionWithReasonString:@"Unit test reset subscription"];
}
#endif

// assume lock is held
- (void)_setupSubscriptionWithReason:(NSString *)reason
{
os_unfair_lock_assert_owner(&self->_lock);

if (![self _subscriptionsAllowed]) {
MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription", self);
MTR_LOG("%@ _setupSubscription: Subscriptions not allowed. Do not set up subscription (reason: %@)", self, reason);
return;
}

Expand All @@ -2328,6 +2400,7 @@ - (void)_setupSubscriptionWithReason:(NSString *)reason

// for now just subscribe once
if (!NeedToStartSubscriptionSetup(_internalDeviceState)) {
MTR_LOG("%@ setupSubscription: no need to subscribe due to internal state %lu (reason: %@)", self, static_cast<unsigned long>(_internalDeviceState), reason);
return;
}

Expand Down Expand Up @@ -3758,14 +3831,21 @@ - (void)_storePersistedDeviceData
}

#ifdef DEBUG
- (MTRDeviceClusterData *)_getClusterDataForPath:(MTRClusterPath *)path
- (MTRDeviceClusterData *)unitTestGetClusterDataForPath:(MTRClusterPath *)path
{
std::lock_guard lock(_lock);

return [[self _clusterDataForPath:path] copy];
}

- (BOOL)_clusterHasBeenPersisted:(MTRClusterPath *)path
- (NSSet<MTRClusterPath *> *)unitTestGetPersistedClusters
{
std::lock_guard lock(_lock);

return [_persistedClusters copy];
}

- (BOOL)unitTestClusterHasBeenPersisted:(MTRClusterPath *)path
{
std::lock_guard lock(_lock);

Expand Down
Loading

0 comments on commit d666567

Please sign in to comment.