From 40962881d79a3660d01a889112d41dbd7d77bef6 Mon Sep 17 00:00:00 2001 From: Shubham Patil Date: Tue, 4 Jul 2023 22:40:43 +0530 Subject: [PATCH] CASE: Handle failure if unable to schedule handle/send sigma3c (#27226) * CASE: Handle failure if unable to schedule handle/send sigma3c - We are no longer unregistering the unsolicit message hander for sigma1. Based on handshake state and failure to schedule work, decide what to do. 1. If in middle of handshake but it's zombie, tear down the handshake. 2. If still in middle of handshake, return without responding. 3. Otherwise, jsut do a new handshake. - Added APIs in helper to check if it fails to schedule after work callback and to re run it from foreground thread. - Added wrapper around the APIs for HandleSigma3 and SendSigma3 cases. * Restyled by clang-format * Address review comments Added the accessor for CASESession state. Now, all the logic for checking and resetting stays with CASESession * Fix some API docs * Addressed review comments * Update src/protocols/secure_channel/CASESession.cpp Co-authored-by: Marc Lepage <67919234+mlepage-google@users.noreply.github.com> * Update src/protocols/secure_channel/CASEServer.cpp Co-authored-by: Marc Lepage <67919234+mlepage-google@users.noreply.github.com> * Moved status check before setting atomic variable * fix build failures --------- Co-authored-by: Restyled.io Co-authored-by: Justin Wood Co-authored-by: Marc Lepage <67919234+mlepage-google@users.noreply.github.com> --- src/protocols/secure_channel/CASEServer.cpp | 17 ++++++- src/protocols/secure_channel/CASESession.cpp | 48 ++++++++++++++++++++ src/protocols/secure_channel/CASESession.h | 11 ++++- 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/src/protocols/secure_channel/CASEServer.cpp b/src/protocols/secure_channel/CASEServer.cpp index b0d74694a5196b..082be87c0a5f0e 100644 --- a/src/protocols/secure_channel/CASEServer.cpp +++ b/src/protocols/secure_channel/CASEServer.cpp @@ -73,6 +73,21 @@ CHIP_ERROR CASEServer::OnUnsolicitedMessageReceived(const PayloadHeader & payloa CHIP_ERROR CASEServer::OnMessageReceived(Messaging::ExchangeContext * ec, const PayloadHeader & payloadHeader, System::PacketBufferHandle && payload) { + if (GetSession().GetState() != CASESession::State::kInitialized) + { + // We are in the middle of CASE handshake + + // Invoke watchdog to fix any stuck handshakes + bool watchdogFired = GetSession().InvokeBackgroundWorkWatchdog(); + if (!watchdogFired) + { + // Handshake wasn't stuck, let it continue its work + // TODO: Send Busy response, #27473 + ChipLogError(Inet, "CASE session is in establishing state, returning without responding"); + return CHIP_NO_ERROR; + } + } + if (!ec->GetSessionHandle()->IsUnauthenticatedSession()) { ChipLogError(Inet, "CASE Server received Sigma1 message %s EC %p", "over encrypted session. Ignoring.", ec); @@ -86,8 +101,6 @@ CHIP_ERROR CASEServer::OnMessageReceived(Messaging::ExchangeContext * ec, const // TODO - Enable multiple concurrent CASE session establishment // https://github.com/project-chip/connectedhomeip/issues/8342 - ChipLogProgress(Inet, "CASE Server disabling CASE session setups"); - mExchangeManager->UnregisterUnsolicitedMessageHandlerForType(Protocols::SecureChannel::MsgType::CASE_Sigma1); err = GetSession().OnMessageReceived(ec, payloadHeader, std::move(payload)); SuccessOrExit(err); diff --git a/src/protocols/secure_channel/CASESession.cpp b/src/protocols/secure_channel/CASESession.cpp index def2ce11cf0c08..35597cde87241c 100644 --- a/src/protocols/secure_channel/CASESession.cpp +++ b/src/protocols/secure_channel/CASESession.cpp @@ -171,6 +171,9 @@ class CASESession::WorkHelper // No scheduling, no outstanding work, no shared lifetime management. CHIP_ERROR DoWork() { + // Ensure that this function is being called from main Matter thread + assertChipStackLockedByCurrentThread(); + VerifyOrReturnError(mSession && mWorkCallback && mAfterWorkCallback, CHIP_ERROR_INCORRECT_STATE); auto * helper = this; bool cancel = false; @@ -203,6 +206,17 @@ class CASESession::WorkHelper bool IsCancelled() const { return mSession.load() == nullptr; } + // This API returns true when background thread fails to schedule the AfterWorkCallback + bool UnableToScheduleAfterWorkCallback() { return mScheduleAfterWorkFailed.load(); } + + // Do after work immediately. + // No scheduling, no outstanding work, no shared lifetime management. + void DoAfterWork() + { + VerifyOrDie(UnableToScheduleAfterWorkCallback()); + AfterWorkHandler(reinterpret_cast(this)); + } + private: // Create a work helper using the specified session, work callback, after work callback, and data (template arg). // Lifetime is not managed, see `Create` for that option. @@ -226,6 +240,12 @@ class CASESession::WorkHelper auto status = DeviceLayer::PlatformMgr().ScheduleWork(AfterWorkHandler, reinterpret_cast(helper)); if (status != CHIP_NO_ERROR) { + // We failed to schedule after work callback, so setting mScheduleAfterWorkFailed flag to true + // This can be checked from foreground thread and after work callback can be retried + helper->mStatus = status; + ChipLogError(SecureChannel, "Failed to Schedule the AfterWorkCallback on foreground thread"); + helper->mScheduleAfterWorkFailed.store(true); + // Release strong ptr since scheduling failed helper->mStrongPtr.reset(); } @@ -234,6 +254,9 @@ class CASESession::WorkHelper // Handler for the after work callback. static void AfterWorkHandler(intptr_t arg) { + // Ensure that this function is being called from main Matter thread + assertChipStackLockedByCurrentThread(); + auto * helper = reinterpret_cast(arg); // Hold strong ptr while work is handled auto strongPtr(std::move(helper->mStrongPtr)); @@ -263,6 +286,10 @@ class CASESession::WorkHelper // Return value of `mWorkCallback`, passed to `mAfterWorkCallback`. CHIP_ERROR mStatus; + // If background thread fails to schedule AfterWorkCallback then this flag is set to true + // and CASEServer then can check this one and run the AfterWorkCallback for us. + std::atomic mScheduleAfterWorkFailed{ false }; + public: // Data passed to `mWorkCallback` and `mAfterWorkCallback`. DATA mData; @@ -2179,4 +2206,25 @@ System::Clock::Timeout CASESession::ComputeSigma2ResponseTimeout(const ReliableM kExpectedHighProcessingTime; } +bool CASESession::InvokeBackgroundWorkWatchdog() +{ + bool watchdogFired = false; + + if (mSendSigma3Helper && mSendSigma3Helper->UnableToScheduleAfterWorkCallback()) + { + ChipLogError(SecureChannel, "SendSigma3Helper was unable to schedule the AfterWorkCallback"); + mSendSigma3Helper->DoAfterWork(); + watchdogFired = true; + } + + if (mHandleSigma3Helper && mHandleSigma3Helper->UnableToScheduleAfterWorkCallback()) + { + ChipLogError(SecureChannel, "HandleSigma3Helper was unable to schedule the AfterWorkCallback"); + mHandleSigma3Helper->DoAfterWork(); + watchdogFired = true; + } + + return watchdogFired; +} + } // namespace chip diff --git a/src/protocols/secure_channel/CASESession.h b/src/protocols/secure_channel/CASESession.h index f0caa1675f7845..7453b6b5002dc4 100644 --- a/src/protocols/secure_channel/CASESession.h +++ b/src/protocols/secure_channel/CASESession.h @@ -191,8 +191,6 @@ class DLL_EXPORT CASESession : public Messaging::UnsolicitedMessageHandler, **/ void Clear(); -private: - friend class TestCASESession; enum class State : uint8_t { kInitialized = 0, @@ -207,6 +205,15 @@ class DLL_EXPORT CASESession : public Messaging::UnsolicitedMessageHandler, kHandleSigma3Pending = 9, }; + State GetState() { return mState; } + + // Returns true if the CASE session handshake was stuck due to failing to schedule work on the Matter thread. + // If this function returns true, the CASE session has been reset and is ready for a new session establishment. + bool InvokeBackgroundWorkWatchdog(); + +private: + friend class TestCASESession; + /* * Initialize the object given a reference to the SessionManager, certificate validity policy and a delegate which will be * notified of any further progress on this session.