Skip to content

Commit

Permalink
[mrp] Fix MRP after a recent refactoring (project-chip#13413)
Browse files Browse the repository at this point in the history
PR 11988 has broken StartTimer call in ReliableMessageMgr
by passing a timestamp instead of a delay. Consequently,
retransmissions stopped working.

Also, it was not caught because unit tests call the timeout
function explicitly instead of relying on the system layer.

Fix the issue and enhance unit tests. Additionally, fix
a crash on access to AsSecureSession() if a retransmission
occurs during PASE or CASE.
  • Loading branch information
Damian-Nordic authored and step0035 committed Feb 8, 2022
1 parent 33d5198 commit 37ca8d7
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 37 deletions.
1 change: 1 addition & 0 deletions src/controller/CHIPDeviceController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ void DeviceController::OnFirstMessageDeliveryFailed(const SessionHandle & sessio
{
VerifyOrReturn(mState == State::Initialized,
ChipLogError(Controller, "OnFirstMessageDeliveryFailed was called in incorrect state"));
VerifyOrReturn(session->GetSessionType() == Transport::Session::SessionType::kSecure);
UpdateDevice(session->AsSecureSession()->GetPeerNodeId());
}

Expand Down
7 changes: 6 additions & 1 deletion src/messaging/ReliableMessageMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include <messaging/Flags.h>
#include <messaging/ReliableMessageContext.h>

using namespace chip::System::Clock::Literals;

namespace chip {
namespace Messaging {

Expand Down Expand Up @@ -308,7 +310,10 @@ void ReliableMessageMgr::StartTimer()
#endif

StopTimer();
VerifyOrDie(mSystemLayer->StartTimer(nextWakeTime, Timeout, this) == CHIP_NO_ERROR);

const System::Clock::Timestamp now = System::SystemClock().GetMonotonicTimestamp();
const auto nextWakeDelay = (nextWakeTime > now) ? nextWakeTime - now : 0_ms;
VerifyOrDie(mSystemLayer->StartTimer(nextWakeDelay, Timeout, this) == CHIP_NO_ERROR);
}
else
{
Expand Down
2 changes: 2 additions & 0 deletions src/messaging/tests/MessagingContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ class LoopbackMessagingContext : public MessagingContext

TransportMgrBase & GetTransportMgr() { return mTransportManager; }

IOContext & GetIOContext() { return mIOContext; }

/*
* For unit-tests that simulate end-to-end transmission and reception of messages in loopback mode,
* this mode better replicates a real-functioning stack that correctly handles the processing
Expand Down
66 changes: 32 additions & 34 deletions src/messaging/tests/TestReliableMessageProtocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,8 @@ void CheckResendApplicationMessage(nlTestSuite * inSuite, void * inContext)
NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1);
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was dropped, and is still there in the retransmit table
Expand All @@ -238,9 +237,8 @@ void CheckResendApplicationMessage(nlTestSuite * inSuite, void * inContext)
NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 2);
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);

// sleep another 65 ms to trigger second re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the second re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was NOT dropped, and the retransmit table is empty, as we should have gotten an ack
Expand Down Expand Up @@ -290,9 +288,8 @@ void CheckCloseExchangeAndResendApplicationMessage(nlTestSuite * inSuite, void *
NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1);
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was dropped, and is still there in the retransmit table
Expand All @@ -301,9 +298,8 @@ void CheckCloseExchangeAndResendApplicationMessage(nlTestSuite * inSuite, void *
NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 2);
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);

// sleep another 65 ms to trigger second re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the second re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was NOT dropped, and the retransmit table is empty, as we should have gotten an ack
Expand Down Expand Up @@ -349,9 +345,8 @@ void CheckFailedMessageRetainOnSend(nlTestSuite * inSuite, void * inContext)
// Ensure the message was dropped
NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; });
ctx.DrainAndServiceIO();

// Ensure the retransmit table is empty, as we did not provide a message to retain
Expand Down Expand Up @@ -441,9 +436,8 @@ void CheckResendApplicationMessageWithPeerExchange(nlTestSuite * inSuite, void *
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
NL_TEST_ASSERT(inSuite, !mockReceiver.IsOnMessageReceivedCalled);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was not dropped, and is no longer in the retransmit table
Expand Down Expand Up @@ -513,9 +507,8 @@ void CheckDuplicateMessageClosedExchange(nlTestSuite * inSuite, void * inContext
err = ctx.GetExchangeManager().UnregisterUnsolicitedMessageHandlerForType(Echo::MsgType::EchoRequest);
NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit and ack (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was sent and the ack was sent
Expand Down Expand Up @@ -574,9 +567,8 @@ void CheckResendSessionEstablishmentMessageWithPeerExchange(nlTestSuite * inSuit
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1);
NL_TEST_ASSERT(inSuite, !mockReceiver.IsOnMessageReceivedCalled);

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit (should take 64ms)
inctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; });
inctx.DrainAndServiceIO();

// Ensure the retransmit message was not dropped, and is no longer in the retransmit table
Expand Down Expand Up @@ -649,9 +641,8 @@ void CheckDuplicateMessage(nlTestSuite * inSuite, void * inContext)
mockReceiver.mDropAckResponse = false;
mockReceiver.mRetainExchange = false;

// sleep 65 ms to trigger first re-transmit
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for the first re-transmit and ack (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; });
ctx.DrainAndServiceIO();

// Ensure the retransmit message was sent and the ack was sent
Expand Down Expand Up @@ -1186,9 +1177,8 @@ void CheckLostResponseWithPiggyback(nlTestSuite * inSuite, void * inContext)
mockSender.IsOnMessageReceivedCalled = false;
mockSender.mReceivedPiggybackAck = false;

// sleep 65 ms to trigger re-transmit from sender
chip::test_utils::SleepMillis(65);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for re-transmit from sender and ack (should take 64ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 4; });
ctx.DrainAndServiceIO();

// We resent our first message, which did not make it to the app-level
Expand Down Expand Up @@ -1216,9 +1206,8 @@ void CheckLostResponseWithPiggyback(nlTestSuite * inSuite, void * inContext)
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0);
}

// sleep 65*3 ms to trigger re-transmit from receiver
chip::test_utils::SleepMillis(65 * 3);
ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm);
// Wait for re-transmit from receiver (should take 256ms)
ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 6; });
ctx.DrainAndServiceIO();

// And now we've definitely resent our response message, which should show
Expand Down Expand Up @@ -1355,6 +1344,14 @@ void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext)
NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0);
}

int InitializeTestCase(void * inContext)
{
TestContext & ctx = *static_cast<TestContext *>(inContext);
ctx.GetSessionAliceToBob()->AsSecureSession()->SetMRPConfig(gDefaultMRPConfig);
ctx.GetSessionBobToAlice()->AsSecureSession()->SetMRPConfig(gDefaultMRPConfig);
return SUCCESS;
}

/**
* TODO: A test that we should have but can't write with the existing
* infrastructure we have:
Expand Down Expand Up @@ -1403,7 +1400,8 @@ nlTestSuite sSuite =
"Test-CHIP-ReliableMessageProtocol",
&sTests[0],
TestContext::InitializeAsync,
TestContext::Finalize
TestContext::Finalize,
InitializeTestCase,
};
// clang-format on

Expand Down
4 changes: 2 additions & 2 deletions src/transport/raw/tests/NetworkTestHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ CHIP_ERROR IOContext::Shutdown()

void IOContext::DriveIO()
{
// Set the select timeout to 100ms
constexpr uint32_t kSleepTimeMilliseconds = 100;
// Set the select timeout to 10ms
constexpr uint32_t kSleepTimeMilliseconds = 10;
ServiceEvents(kSleepTimeMilliseconds);
}

Expand Down

0 comments on commit 37ca8d7

Please sign in to comment.