Skip to content

Commit

Permalink
[3/3] Spec-compliant CASE eviction policy algorithm (#19903)
Browse files Browse the repository at this point in the history
* Session Eviction Policy Algorithm.

This implements a reasonably complete session eviction policy algorithm
that meets the spec minima requirements for sessions / fabric and
ensures that can be up-held at all times on a given fabric regardless of activity on
other fabrics. It also adds a more nuanced selection algorithm to
correctly balance the multiple factors that go into selecting a session
for eviction.

* Made the temporary SortableSessions list an array on stack as opposed to
being allocated out of the heap.

* Review feedback

* Review feedback

* Apply suggestions from code review

Co-authored-by: Boris Zbarsky <[email protected]>

Co-authored-by: Boris Zbarsky <[email protected]>
  • Loading branch information
2 people authored and pull[bot] committed Jul 24, 2023
1 parent ea41b72 commit 3d2abe3
Show file tree
Hide file tree
Showing 7 changed files with 692 additions and 47 deletions.
91 changes: 85 additions & 6 deletions src/controller/python/test/test_scripts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,25 +370,104 @@ async def TestCaseEviction(self, nodeid: int):
# of eviction, just that allocation and CASE session establishment proceeds successfully on both
# the controller and target.
#
for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 3):
for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl.CloseSession(nodeid)
await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

self.logger.info("Testing CASE defunct logic")

#
# This tests establishing a subscription on a given CASE session, then mark it defunct (to simulate
# This tests establishes a subscription on a given CASE session, then marks it defunct (to simulate
# encountering a transport timeout on the session).
#
# At the max interval, we should still have a valid subscription.
# Then, we write to the attribute that was subscribed to from a *different* fabric and check to ensure we still get a report
# on the sub we established previously. Since it was just marked defunct, it should return back to being
# active and a report should get delivered.
#
sawValueChange = False

def OnValueChange(path: Attribute.TypedAttributePath, transaction: Attribute.SubscriptionTransaction) -> None:
nonlocal sawValueChange
self.logger.info("Saw value change!")
if (path.AttributeType == Clusters.TestCluster.Attributes.Int8u and path.Path.EndpointId == 1):
sawValueChange = True

self.logger.info("Testing CASE defunct logic")

sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

#
# This marks the session defunct.
#
sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)], reportInterval=(0, 2))
await asyncio.sleep(2)
self.devCtrl.CloseSession(nodeid)
await asyncio.sleep(4)

#
# Now write the attribute from fabric2, give it some time before checking if the report
# was received.
#
await self.devCtrl2.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to unexpected session eviction!")
return False

#
# In this test, we're going to setup a subscription on fabric1 through devCtl, then, constantly keep
# evicting sessions on fabric2 (devCtl2) by cycling through closing sessions followed by issuing a Read. This
# should result in evictions on the server on fabric2, but not affect any sessions on fabric1. To test this,
# we're going to setup a subscription to an attribute prior to the cycling reads, and check at the end of the
# test that it's still valid by writing to an attribute from a *different* fabric, and validating that we see
# the change on the established subscription. That proves that the session from fabric1 is still valid and untouched.
#
self.logger.info("Testing fabric-isolated CASE eviction")

sawValueChange = False
sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl2.CloseSession(nodeid)
await self.devCtrl2.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

#
# Now write the attribute from fabric2, give it some time before checking if the report
# was received.
#
await self.devCtrl2.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to other fabric (fabric1)")
return False

#
# Do the same test again, but reversing the roles of fabric1 and fabric2.
#
self.logger.info("Testing fabric-isolated CASE eviction (reverse)")

sawValueChange = False
sub = await self.devCtrl2.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl.CloseSession(nodeid)
await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

await self.devCtrl.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to other fabric (fabric2)")
return False

return True

async def TestMultiFabric(self, ip: str, setuppin: int, nodeid: int):
Expand Down
16 changes: 3 additions & 13 deletions src/controller/python/test/test_scripts/mobile-device-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,14 @@ def ethernet_commissioning(test: BaseTestHelper, discriminator: int, setup_pin:
nodeid=device_nodeid),
"Failed to finish key exchange")

#
# Run this before the MultiFabric test, since it will result in the resultant CASE session
# on fabric2 to be evicted (due to the stressful nature of that test) on the target.
#
# It doesn't actually evict the CASE session for fabric2 on the client, since we prioritize
# defunct sessions for eviction first, which means our CASE session on fabric2 remains preserved
# throughout the stress test. This results in a mis-match later.
#
# TODO: Once we implement fabric-adjusted LRU, we should see if this issue remains (it shouldn't)
#
logger.info("Testing CASE Eviction")
FailIfNot(asyncio.run(test.TestCaseEviction(device_nodeid)), "Failed TestCaseEviction")

ok = asyncio.run(test.TestMultiFabric(ip=address,
setuppin=20202021,
nodeid=1))
FailIfNot(ok, "Failed to commission multi-fabric")

logger.info("Testing CASE Eviction")
FailIfNot(asyncio.run(test.TestCaseEviction(device_nodeid)), "Failed TestCaseEviction")

logger.info("Testing closing sessions")
FailIfNot(test.TestCloseSession(nodeid=device_nodeid), "Failed to close sessions")

Expand Down
2 changes: 2 additions & 0 deletions src/transport/SecureSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ class SecureSession : public Session, public ReferenceCounted<SecureSession, Sec
void MoveToState(State targetState);

friend class SecureSessionDeleter;
friend class TestSecureSessionTable;

SecureSessionTable & mTable;
State mState;
const Type mSecureSessionType;
Expand Down
144 changes: 123 additions & 21 deletions src/transport/SecureSessionTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Optional<SessionHandle> SecureSessionTable::CreateNewSecureSession(SecureSession
// to run the eviction algorithm to get a free slot. We shall ALWAYS be guaranteed to evict
// an existing session in the table in normal operating circumstances.
//
if (mEntries.Allocated() < CHIP_CONFIG_SECURE_SESSION_POOL_SIZE)
if (mEntries.Allocated() < GetMaxSessionTableSize())
{
allocated = mEntries.CreateObject(*this, secureSessionType, sessionId.Value());
}
Expand Down Expand Up @@ -102,47 +102,92 @@ SecureSession * SecureSessionTable::EvictAndAllocate(uint16_t localSessionId, Se
ChipLogProgress(SecureChannel, "Evicting a slot for session with LSID: %d, type: %u", localSessionId,
(uint8_t) secureSessionType);

VerifyOrDie(mEntries.Allocated() <= CHIP_CONFIG_SECURE_SESSION_POOL_SIZE);
VerifyOrDie(mEntries.Allocated() <= GetMaxSessionTableSize());

//
// Create a temporary list of objects each of which points to a session in the existing
// session table, but are swappable. This allows them to then be used with a sorting algorithm
// without affecting the sessions in the table itself.
//
Platform::ScopedMemoryBufferWithSize<SortableSession> sortableSessions;
sortableSessions.Calloc(mEntries.Allocated());
if (!sortableSessions)
{
VerifyOrDieWithMsg(false, SecureChannel, "We couldn't allocate a session!");
return nullptr;
}
// The size of this shouldn't place significant demands on the stack if using the default
// configuration for CHIP_CONFIG_SECURE_SESSION_POOL_SIZE (17). Each item is
// 8 bytes in size (on a 32-bit platform), and 16 bytes in size (on a 64-bit platform,
// including padding).
//
// Total size of this stack variable = 17 * 8 = 136bytes (32-bit platform), 272 bytes (64-bit platform).
//
// Even if the define is set to a large value, it's likely not so bad on the sort of platform setup
// that would have that sort of pool size.
//
// We need to sort (as opposed to just a linear search for the smallest/largest item)
// since it is possible that the candidate selected for eviction may not actually be
// released once marked for expiration (see comments below for more details).
//
// Consequently, we may need to walk the candidate list till we find one that is.
// Sorting provides a better overall performance model in this scheme.
//
// (#19967): Investigate doing linear search instead.
//
//
SortableSession sortableSessions[CHIP_CONFIG_SECURE_SESSION_POOL_SIZE];

unsigned int index = 0;

//
// Compute two key stats for each session - the number of other sessions that
// match its fabric, as well as the number of other sessions that match its peer.
//
// This will be used by the session eviction algorithm later.
//
ForEachSession([&index, &sortableSessions, this](auto * session) {
sortableSessions[index].mSession = session;
sortableSessions[index].mNumMatchingOnFabric = 0;
sortableSessions[index].mNumMatchingOnPeer = 0;

ForEachSession([session, index, &sortableSessions](auto * otherSession) {
if (session != otherSession)
{
if (session->GetFabricIndex() == otherSession->GetFabricIndex())
{
sortableSessions[index].mNumMatchingOnFabric++;

if (session->GetPeerNodeId() == otherSession->GetPeerNodeId())
{
sortableSessions[index].mNumMatchingOnPeer++;
}
}
}

return Loop::Continue;
});

int index = 0;
ForEachSession([&index, &sortableSessions](auto session) {
sortableSessions.Get()[index].mSession = session;
index++;
return Loop::Continue;
});

auto sortableSessionSpan = Span<SortableSession>(sortableSessions.Get(), mEntries.Allocated());
auto sortableSessionSpan = Span<SortableSession>(sortableSessions, mEntries.Allocated());
EvictionPolicyContext policyContext(sortableSessionSpan, sessionEvictionHint);

DefaultEvictionPolicy(policyContext);
ChipLogProgress(SecureChannel, "Sorted sessions for eviction...");

auto numSessions = mEntries.Allocated();
const auto numSessions = mEntries.Allocated();

#if CHIP_DETAIL_LOGGING
ChipLogDetail(SecureChannel, "Sorted Eviction Candidates (ranked from best candidate to worst):");
for (auto * session = sortableSessions.Get(); session != (sortableSessions.Get() + numSessions); session++)
for (auto * session = sortableSessions; session != (sortableSessions + numSessions); session++)
{
ChipLogDetail(SecureChannel, "\t%ld: [%p] -- State: '%s', ActivityTime: %lu",
static_cast<long int>(session - sortableSessions.Get()), session->mSession, session->mSession->GetStateStr(),
ChipLogDetail(SecureChannel,
"\t%ld: [%p] -- Peer: [%u:" ChipLogFormatX64
"] State: '%s', NumMatchingOnFabric: %d NumMatchingOnPeer: %d ActivityTime: %lu",
static_cast<long int>(session - sortableSessions), session->mSession,
session->mSession->GetPeer().GetFabricIndex(), ChipLogValueX64(session->mSession->GetPeer().GetNodeId()),
session->mSession->GetStateStr(), session->mNumMatchingOnFabric, session->mNumMatchingOnPeer,
static_cast<unsigned long>(session->mSession->GetLastActivityTime().count()));
}
#endif

for (auto * session = sortableSessions.Get(); session != (sortableSessions.Get() + numSessions); session++)
for (auto * session = sortableSessions; session != (sortableSessions + numSessions); session++)
{
if (session->mSession->IsPendingEviction())
{
Expand Down Expand Up @@ -183,9 +228,55 @@ SecureSession * SecureSessionTable::EvictAndAllocate(uint16_t localSessionId, Se

void SecureSessionTable::DefaultEvictionPolicy(EvictionPolicyContext & evictionContext)
{
evictionContext.Sort([](const auto & a, const auto & b) {
int aStateScore = 0, bStateScore = 0;
//
// This implements a spec-compliant sorting policy that ensures both guarantees for sessions per-fabric as
// mandated by the spec as well as fairness in terms of selecting the most appropriate session to evict
// based on multiple criteria.
//
// See the description of this function in the header for more details on each sorting key below.
//
evictionContext.Sort([&evictionContext](const SortableSession & a, const SortableSession & b) -> bool {
//
// Sorting on Key1
//
if (a.mNumMatchingOnFabric != b.mNumMatchingOnFabric)
{
return a.mNumMatchingOnFabric > b.mNumMatchingOnFabric;
}

bool doesAMatchSessionHintFabric =
a.mSession->GetPeer().GetFabricIndex() == evictionContext.GetSessionEvictionHint().GetFabricIndex();
bool doesBMatchSessionHintFabric =
b.mSession->GetPeer().GetFabricIndex() == evictionContext.GetSessionEvictionHint().GetFabricIndex();

//
// Sorting on Key2
//
if (doesAMatchSessionHintFabric != doesBMatchSessionHintFabric)
{
return doesAMatchSessionHintFabric > doesBMatchSessionHintFabric;
}

//
// Sorting on Key3
//
if (a.mNumMatchingOnPeer != b.mNumMatchingOnPeer)
{
return a.mNumMatchingOnPeer > b.mNumMatchingOnPeer;
}

int doesAMatchSessionHint = a.mSession->GetPeer() == evictionContext.GetSessionEvictionHint();
int doesBMatchSessionHint = b.mSession->GetPeer() == evictionContext.GetSessionEvictionHint();

//
// Sorting on Key4
//
if (doesAMatchSessionHint != doesBMatchSessionHint)
{
return doesAMatchSessionHint > doesBMatchSessionHint;
}

int aStateScore = 0, bStateScore = 0;
auto assignStateScore = [](auto & score, const auto & session) {
if (session.IsDefunct())
{
Expand All @@ -204,7 +295,18 @@ void SecureSessionTable::DefaultEvictionPolicy(EvictionPolicyContext & evictionC
assignStateScore(aStateScore, *a.mSession);
assignStateScore(bStateScore, *b.mSession);

return ((aStateScore > bStateScore) ? true : (a->GetLastActivityTime() < b->GetLastActivityTime()));
//
// Sorting on Key5
//
if (aStateScore != bStateScore)
{
return (aStateScore > bStateScore);
}

//
// Sorting on Key6
//
return (a->GetLastActivityTime() < b->GetLastActivityTime());
});
}

Expand Down
Loading

0 comments on commit 3d2abe3

Please sign in to comment.