From 136016035be7e00aa194208c1fde1aa55769bbdd Mon Sep 17 00:00:00 2001 From: "Irene Siu (Apple)" Date: Thu, 26 May 2022 10:32:33 -0700 Subject: [PATCH] [OTA-R] Fix retries on Query Image failure due to invalid session (#18765) * Fixed this scenerio: * Requestor does one successful QueryImage (CASE established) * Provider killed * Provider relaunched * Requestor tries QueryImage again and gets the timeout error -> requestor invalidates the CASE -> requestor tries queryImage again and this time succeeds * - Add retry cap - Remove debug messages * - Consolidate code for resetting mInvalidSessionRetryCount - scripts/helpers/restyle-diff.sh * Code review changes. * Code review changes: - Set kMaxInvalidSessionRetries to 1 - Remove mInvalidSessionRetryCount and use mProviderRetryCount instead * Code review changes. - Consolidate the code for updating mCurrentUpdateState. * Reinstate mInvalidSessionRetryCount. * Remove debug code. * Code review changes. - Update comments. --- .../ota-requestor/DefaultOTARequestor.cpp | 12 ++++---- .../DefaultOTARequestorDriver.cpp | 29 +++++++++++++++---- .../ota-requestor/DefaultOTARequestorDriver.h | 5 +++- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp b/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp index e7647d768b1bd5..86a9076678d22d 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp +++ b/src/app/clusters/ota-requestor/DefaultOTARequestor.cpp @@ -668,19 +668,19 @@ void DefaultOTARequestor::RecordNewUpdateState(OTAUpdateStateEnum newState, OTAC } OtaRequestorServerOnStateTransition(mCurrentUpdateState, newState, reason, targetSoftwareVersion); - if ((newState == OTAUpdateStateEnum::kIdle) && (mCurrentUpdateState != OTAUpdateStateEnum::kIdle)) + OTAUpdateStateEnum prevState = mCurrentUpdateState; + // Update the new state before handling the state transition + mCurrentUpdateState = newState; + + if ((newState == OTAUpdateStateEnum::kIdle) && (prevState != OTAUpdateStateEnum::kIdle)) { IdleStateReason idleStateReason = MapErrorToIdleStateReason(error); - - // Inform the driver that the core logic has entered the Idle state mOtaRequestorDriver->HandleIdleStateEnter(idleStateReason); } - else if ((mCurrentUpdateState == OTAUpdateStateEnum::kIdle) && (newState != OTAUpdateStateEnum::kIdle)) + else if ((prevState == OTAUpdateStateEnum::kIdle) && (newState != OTAUpdateStateEnum::kIdle)) { mOtaRequestorDriver->HandleIdleStateExit(); } - - mCurrentUpdateState = newState; } void DefaultOTARequestor::RecordErrorUpdateState(CHIP_ERROR error, OTAChangeReasonEnum reason) diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp index 52d5ad0798be2e..b3d8544e85198d 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp +++ b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.cpp @@ -45,6 +45,7 @@ namespace { using namespace app::Clusters::OtaSoftwareUpdateRequestor; using namespace app::Clusters::OtaSoftwareUpdateRequestor::Structs; +constexpr uint8_t kMaxInvalidSessionRetries = 1; // Max # of query image retries to perform on invalid session error constexpr uint32_t kDelayQueryUponCommissioningSec = 30; // Delay before sending the initial image query after commissioning constexpr uint32_t kImmediateStartDelaySec = 1; // Delay before sending a query in response to UrgentUpdateAvailable constexpr System::Clock::Seconds32 kDefaultDelayedActionTime = System::Clock::Seconds32(120); @@ -58,9 +59,10 @@ DefaultOTARequestorDriver * ToDriver(void * context) void DefaultOTARequestorDriver::Init(OTARequestorInterface * requestor, OTAImageProcessorInterface * processor) { - mRequestor = requestor; - mImageProcessor = processor; - mProviderRetryCount = 0; + mRequestor = requestor; + mImageProcessor = processor; + mProviderRetryCount = 0; + mInvalidSessionRetryCount = 0; if (mImageProcessor->IsFirstImageRun()) { @@ -127,6 +129,11 @@ void DefaultOTARequestorDriver::HandleIdleStateExit() void DefaultOTARequestorDriver::HandleIdleStateEnter(IdleStateReason reason) { + if (reason != IdleStateReason::kInvalidSession) + { + mInvalidSessionRetryCount = 0; + } + switch (reason) { case IdleStateReason::kUnknown: @@ -138,8 +145,20 @@ void DefaultOTARequestorDriver::HandleIdleStateEnter(IdleStateReason reason) StartSelectedTimer(SelectedTimer::kPeriodicQueryTimer); break; case IdleStateReason::kInvalidSession: - // An invalid session is detected which may be temporary so try to query the same provider again - SendQueryImage(); + if (mInvalidSessionRetryCount < kMaxInvalidSessionRetries) + { + // An invalid session is detected which may be temporary (such as provider being restarted) + // so try to query the same provider again. Since the session has already been disconnected prior to + // getting here, this new query should trigger an attempt to re-establish CASE. If that subsequently fails, + // we conclusively know the provider is not available, and will fall into the else clause below on that attempt. + SendQueryImage(); + mInvalidSessionRetryCount++; + } + else + { + mInvalidSessionRetryCount = 0; + StartSelectedTimer(SelectedTimer::kPeriodicQueryTimer); + } break; } } diff --git a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h index 1fa0ba07f8f780..0d76af626d6dc6 100644 --- a/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h +++ b/src/app/clusters/ota-requestor/DefaultOTARequestorDriver.h @@ -109,7 +109,10 @@ class DefaultOTARequestorDriver : public OTARequestorDriver uint16_t maxDownloadBlockSize = 1024; // Maximum number of times to retry a BUSY OTA provider before moving to the next available one static constexpr uint8_t kMaxBusyProviderRetryCount = 3; - uint8_t mProviderRetryCount; // Track retry count for the current provider + // Track retry count for the current provider + uint8_t mProviderRetryCount = 0; + // Track query image retry count on invalid session error + uint8_t mInvalidSessionRetryCount = 0; }; } // namespace DeviceLayer