From 2f4baf2d646dfb9efc3cf978a0446537f77472cc Mon Sep 17 00:00:00 2001 From: Nivedita Sarkar Date: Thu, 2 Feb 2023 15:02:07 -0800 Subject: [PATCH] Add a timer to track whether we have received BDX init after a query image was successful - Currently if an ota requester has a successful query image and an image it available but if for any reason, the ota requester doesn't send BDX init, the provider will be stuck until we reboot the resident. In order to have a fail safe, we are adding a timer that starts after query image returns image available and waits for a BDX init to come. In case BDX init doesn't come, it times out and resets state - Add code to reset state if any API fails on the provider once we prepare for BDX transfer - Stop polling when BDX transfer reset is called - Return QueryImageResponse status busy instead of general failure if the sdk is busy and gets a sexond query image so accessory can handle the error correctly and retry until the sdk is done - When we send a message, handle the case where if an error happens in sending the message close the exchange context and reset state. If the message is a status report and it succeeds, null out the exchange and reset state --- .../CHIP/MTROTAProviderDelegateBridge.mm | 126 ++++++++++++++---- src/protocols/bdx/TransferFacilitator.cpp | 1 + 2 files changed, 99 insertions(+), 28 deletions(-) diff --git a/src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm b/src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm index 8592a645982d89..245b05c3d4af22 100644 --- a/src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm +++ b/src/darwin/Framework/CHIP/MTROTAProviderDelegateBridge.mm @@ -42,6 +42,14 @@ // TODO Expose a method onto the delegate to make that configurable. constexpr uint32_t kMaxBdxBlockSize = 1024; constexpr uint32_t kMaxBDXURILen = 256; + +// Since the BDX timeout is 5 minutes and we are starting this after query image is available and before the BDX init comes, +// we just double the timeout to give enough time for the BDX init to come in a reasonable amount of time. +constexpr System::Clock::Timeout kBdxInitReceivedTimeout = System::Clock::Seconds16(10 * 60); + +// Time in seconds after which the requestor should retry calling query image if busy status is receieved +constexpr uint32_t kDelayedActionTime = 120; + constexpr System::Clock::Timeout kBdxTimeout = System::Clock::Seconds16(5 * 60); // OTA Spec mandates >= 5 minutes constexpr System::Clock::Timeout kBdxPollIntervalMs = System::Clock::Milliseconds32(50); constexpr bdx::TransferRole kBdxRole = bdx::TransferRole::kSender; @@ -89,13 +97,12 @@ CHIP_ERROR Shutdown() VerifyOrReturnError(mExchangeMgr != nullptr, CHIP_ERROR_INCORRECT_STATE); mExchangeMgr->UnregisterUnsolicitedMessageHandlerForProtocol(Protocols::BDX::Id); + ResetState(); mExchangeMgr = nullptr; mSystemLayer = nullptr; mDelegateNotificationQueue = nil; - ResetState(); - return CHIP_NO_ERROR; } @@ -120,7 +127,39 @@ void SetDelegate(id delegate, dispatch_queue_t delegateN } } + void ResetState() + { + assertChipStackLockedByCurrentThread(); + if (mSystemLayer) { + mSystemLayer->CancelTimer(HandleBdxInitReceivedTimeoutExpired, this); + } + // TODO: Check if this can be removed. It seems like we can close the exchange context and reset transfer regardless. + if (!mInitialized) { + return; + } + Responder::ResetTransfer(); + ++mTransferGeneration; + mFabricIndex.ClearValue(); + mNodeId.ClearValue(); + + if (mExchangeCtx != nullptr) { + mExchangeCtx->Close(); + mExchangeCtx = nullptr; + } + + mInitialized = false; + } + private: + /** + * Timer callback called when we don't receive a BDX init within a reasonable time after a successful QueryImage response. + */ + static void HandleBdxInitReceivedTimeoutExpired(chip::System::Layer * systemLayer, void * state) + { + VerifyOrReturn(state != nullptr); + static_cast(state)->ResetState(); + } + CHIP_ERROR OnMessageToSend(TransferSession::OutputEvent & event) { assertChipStackLockedByCurrentThread(); @@ -137,12 +176,40 @@ CHIP_ERROR OnMessageToSend(TransferSession::OutputEvent & event) } auto & msgTypeData = event.msgTypeData; - return mExchangeCtx->SendMessage(msgTypeData.ProtocolId, msgTypeData.MessageType, std::move(event.MsgData), sendFlags); + // If there's an error sending the message, close the exchange and call ResetState. + // TODO: If we can remove the !mInitialized check in ResetState(), just calling ResetState() will suffice here. + CHIP_ERROR err + = mExchangeCtx->SendMessage(msgTypeData.ProtocolId, msgTypeData.MessageType, std::move(event.MsgData), sendFlags); + if (err != CHIP_NO_ERROR) { + mExchangeCtx->Close(); + mExchangeCtx = nullptr; + ResetState(); + } + // If we sent a status report and it was successful, set the Exchange context to null and call ResetState. + if (event.msgTypeData.HasMessageType(Protocols::SecureChannel::MsgType::StatusReport) && err == CHIP_NO_ERROR) { + mExchangeCtx = nullptr; + ResetState(); + } + return err; + } + + bdx::StatusCode GetBdxStatusCodeFromChipError(CHIP_ERROR err) { + if (err == CHIP_ERROR_INCORRECT_STATE) { + return bdx::StatusCode::kUnexpectedMessage; + } + if (err == CHIP_ERROR_INVALID_ARGUMENT) { + return bdx::StatusCode::kBadMessageContents; + } + return bdx::StatusCode::kUnknown; } CHIP_ERROR OnTransferSessionBegin(TransferSession::OutputEvent & event) { assertChipStackLockedByCurrentThread(); + // Once we receive the BDX init, cancel the BDX Init timeout and start the BDX session + if (mSystemLayer) { + mSystemLayer->CancelTimer(HandleBdxInitReceivedTimeoutExpired, this); + } VerifyOrReturnError(mFabricIndex.HasValue(), CHIP_ERROR_INCORRECT_STATE); VerifyOrReturnError(mNodeId.HasValue(), CHIP_ERROR_INCORRECT_STATE); @@ -169,8 +236,9 @@ CHIP_ERROR OnTransferSessionBegin(TransferSession::OutputEvent & event) } if (error != nil) { - LogErrorOnFailure([MTRError errorToCHIPErrorCode:error]); - LogErrorOnFailure(mTransfer.AbortTransfer(bdx::StatusCode::kUnknown)); + CHIP_ERROR err = [MTRError errorToCHIPErrorCode:error]; + LogErrorOnFailure(err); + LogErrorOnFailure(mTransfer.AbortTransfer(GetBdxStatusCodeFromChipError(err))); return; } @@ -328,6 +396,9 @@ void HandleTransferSessionOutput(TransferSession::OutputEvent & event) override switch (event.EventType) { case TransferSession::OutputEventType::kInitReceived: err = OnTransferSessionBegin(event); + if (err != CHIP_NO_ERROR) { + LogErrorOnFailure(mTransfer.AbortTransfer(GetBdxStatusCodeFromChipError(err))); + } break; case TransferSession::OutputEventType::kStatusReceived: ChipLogError(BDX, "Got StatusReport %x", static_cast(event.statusData.statusCode)); @@ -370,6 +441,10 @@ CHIP_ERROR ConfigureState(chip::FabricIndex fabricIndex, chip::NodeId nodeId) ResetState(); } + // Start a timer to track whether we receive a BDX init after a successful query image in a reasonable amount of time + CHIP_ERROR err = mSystemLayer->StartTimer(kBdxInitReceivedTimeout, HandleBdxInitReceivedTimeoutExpired, this); + VerifyOrReturnError(err == CHIP_NO_ERROR, CHIP_ERROR_INCORRECT_STATE); + mFabricIndex.SetValue(fabricIndex); mNodeId.SetValue(nodeId); @@ -378,27 +453,6 @@ CHIP_ERROR ConfigureState(chip::FabricIndex fabricIndex, chip::NodeId nodeId) return CHIP_NO_ERROR; } - void ResetState() - { - assertChipStackLockedByCurrentThread(); - - if (!mInitialized) { - return; - } - - Responder::ResetTransfer(); - ++mTransferGeneration; - mFabricIndex.ClearValue(); - mNodeId.ClearValue(); - - if (mExchangeCtx != nullptr) { - mExchangeCtx->Close(); - mExchangeCtx = nullptr; - } - - mInitialized = false; - } - bool mInitialized = false; Optional mFabricIndex; Optional mNodeId; @@ -574,11 +628,19 @@ bool GetPeerNodeInfo(CommandHandler * commandHandler, const ConcreteCommandPath CHIP_ERROR err = gOtaSender.PrepareForTransfer(fabricIndex, nodeId); if (CHIP_NO_ERROR != err) { LogErrorOnFailure(err); + if (err == CHIP_ERROR_BUSY) { + Commands::QueryImageResponse::Type busyResponse; + busyResponse.status = static_cast(MTROTASoftwareUpdateProviderOTAQueryStatusBusy); + busyResponse.delayedActionTime.SetValue(response.delayedActionTime.ValueOr(kDelayedActionTime)); + handler->AddResponse(cachedCommandPath, busyResponse); + handle.Release(); + return; + } handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure); handle.Release(); + gOtaSender.ResetState(); return; } - auto targetNodeId = handler->GetExchangeContext()->GetSessionHandle()->AsSecureSession()->GetLocalScopedNodeId(); @@ -589,6 +651,7 @@ bool GetPeerNodeInfo(CommandHandler * commandHandler, const ConcreteCommandPath LogErrorOnFailure(err); handler->AddStatus(cachedCommandPath, Protocols::InteractionModel::Status::Failure); handle.Release(); + gOtaSender.ResetState(); return; } @@ -597,9 +660,16 @@ bool GetPeerNodeInfo(CommandHandler * commandHandler, const ConcreteCommandPath handle.Release(); return; } - + if (!isBDXProtocolSupported) { + Commands::QueryImageResponse::Type protocolNotSupportedResponse; + protocolNotSupportedResponse.status = static_cast(MTROTASoftwareUpdateProviderOTAQueryStatusBusy); + handler->AddResponse(cachedCommandPath, protocolNotSupportedResponse); + } else { + handler->AddResponse(cachedCommandPath, response); + } handler->AddResponse(cachedCommandPath, response); handle.Release(); + gOtaSender.ResetState(); } errorHandler:^(NSError *) { diff --git a/src/protocols/bdx/TransferFacilitator.cpp b/src/protocols/bdx/TransferFacilitator.cpp index 65ab653420c2cd..767d16313994e4 100644 --- a/src/protocols/bdx/TransferFacilitator.cpp +++ b/src/protocols/bdx/TransferFacilitator.cpp @@ -112,6 +112,7 @@ CHIP_ERROR Responder::PrepareForTransfer(System::Layer * layer, TransferRole rol void Responder::ResetTransfer() { mTransfer.Reset(); + mStopPolling = true; } CHIP_ERROR Initiator::InitiateTransfer(System::Layer * layer, TransferRole role, const TransferSession::TransferInitData & initData,