Skip to content

Commit

Permalink
Fix mutex use after destroy upon stack shutdown (project-chip#24191)
Browse files Browse the repository at this point in the history
* Fix mutex use after destroy upon stack shutdown

Shutting down CHIP stack when the event loop is still running might lead
to use after free for objects used in CHIP main event loop. This commit
fixes this by not running pychip_DeviceController_StackShutdown() on the
CHIP thread, but in the main thread of Python application.

* Account for externally managed threads which are joined outside of the
POSIX platform manager implementation class.

* java: Stop IO thread before shutting down commissioner
  • Loading branch information
arkq authored and David Lechner committed Mar 22, 2023
1 parent 66d3ba4 commit ad3b697
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 41 deletions.
34 changes: 26 additions & 8 deletions src/controller/java/CHIPDeviceController-JNI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ using namespace chip::Credentials;
#define CDC_JNI_CALLBACK_LOCAL_REF_COUNT 256

static void * IOThreadMain(void * arg);
static CHIP_ERROR StopIOThread();
static CHIP_ERROR N2J_PaseVerifierParams(JNIEnv * env, jlong setupPincode, jbyteArray pakeVerifier, jobject & outParams);
static CHIP_ERROR N2J_NetworkLocation(JNIEnv * env, jstring ipAddress, jint port, jint interfaceIndex, jobject & outLocation);
static CHIP_ERROR GetChipPathIdValue(jobject chipPathId, uint32_t wildcardValue, uint32_t & outValue);
Expand Down Expand Up @@ -147,14 +148,10 @@ void JNI_OnUnload(JavaVM * jvm, void * reserved)
chip::DeviceLayer::StackLock lock;
ChipLogProgress(Controller, "JNI_OnUnload() called");

// If the IO thread has been started, shut it down and wait for it to exit.
if (sIOThread != PTHREAD_NULL)
{
chip::DeviceLayer::PlatformMgr().StopEventLoopTask();

chip::DeviceLayer::StackUnlock unlock;
pthread_join(sIOThread, NULL);
}
// If the IO thread has not been stopped yet, shut it down now.
// TODO(arkq): Maybe we should just assert here, as the IO thread
// should be stopped before the library is unloaded.
StopIOThread();

sJVM = NULL;

Expand Down Expand Up @@ -1020,6 +1017,10 @@ JNI_METHOD(void, shutdownCommissioning)
(JNIEnv * env, jobject self, jlong handle)
{
chip::DeviceLayer::StackLock lock;

// Stop the IO thread, so that the controller can be safely shut down.
StopIOThread();

AndroidDeviceControllerWrapper * wrapper = AndroidDeviceControllerWrapper::FromJNIHandle(handle);
wrapper->Controller()->Shutdown();
}
Expand Down Expand Up @@ -1410,6 +1411,23 @@ void * IOThreadMain(void * arg)
return NULL;
}

// NOTE: This function SHALL be called with the stack lock held.
CHIP_ERROR StopIOThread()
{
if (sIOThread != PTHREAD_NULL)
{
ChipLogProgress(Controller, "IO thread stopping");
chip::DeviceLayer::StackUnlock unlock;

chip::DeviceLayer::PlatformMgr().StopEventLoopTask();

pthread_join(sIOThread, NULL);
sIOThread = PTHREAD_NULL;
}

return CHIP_NO_ERROR;
}

CHIP_ERROR N2J_PaseVerifierParams(JNIEnv * env, jlong setupPincode, jbyteArray paseVerifier, jobject & outParams)
{
CHIP_ERROR err = CHIP_NO_ERROR;
Expand Down
5 changes: 4 additions & 1 deletion src/controller/python/chip/ChipStack.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ def setLogFunct(self, logFunct):
self._ChipStackLib.pychip_Stack_SetLogFunct(logFunct)

def Shutdown(self):
self.Call(lambda: self._ChipStackLib.pychip_DeviceController_StackShutdown()).raise_on_error()
#
# Terminate Matter thread and shutdown the stack.
#
self._ChipStackLib.pychip_DeviceController_StackShutdown()

#
# We only shutdown the persistent storage layer AFTER we've shut down the stack,
Expand Down
19 changes: 7 additions & 12 deletions src/include/platform/internal/GenericPlatformManagerImpl_POSIX.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,24 +55,19 @@ class GenericPlatformManagerImpl_POSIX : public GenericPlatformManagerImpl<ImplC
// OS-specific members (pthread)
pthread_mutex_t mChipStackLock = PTHREAD_MUTEX_INITIALIZER;

enum TaskType
enum class State
{
kExternallyManagedTask = 0,
kInternallyManagedTask = 1
kStopped = 0,
kRunning = 1,
kStopping = 2,
};

pthread_t mChipTask;
bool mHasValidChipTask = false;
TaskType mTaskType;
bool mInternallyManagedChipTask = false;
std::atomic<State> mState{ State::kStopped };
pthread_cond_t mEventQueueStoppedCond;
pthread_mutex_t mStateLock;

//
// TODO: This variable is very similar to mMainLoopIsStarted, track the
// cleanup and consolidation in this issue:
//
bool mEventQueueHasStopped = false;

pthread_attr_t mChipTaskAttr;
struct sched_param mChipTaskSchedParam;

Expand Down Expand Up @@ -109,7 +104,7 @@ class GenericPlatformManagerImpl_POSIX : public GenericPlatformManagerImpl<ImplC
void ProcessDeviceEvents();

DeviceSafeQueue mChipEventQueue;
std::atomic<bool> mShouldRunEventLoop;
std::atomic<bool> mShouldRunEventLoop{ true };
static void * EventLoopTaskMain(void * arg);
};

Expand Down
52 changes: 32 additions & 20 deletions src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,12 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_InitChipStack()
// Call up to the base class _InitChipStack() to perform the bulk of the initialization.
ReturnErrorOnFailure(GenericPlatformManagerImpl<ImplClass>::_InitChipStack());

mShouldRunEventLoop.store(true, std::memory_order_relaxed);

int ret = pthread_cond_init(&mEventQueueStoppedCond, nullptr);
VerifyOrReturnError(ret == 0, CHIP_ERROR_POSIX(ret));

ret = pthread_mutex_init(&mStateLock, nullptr);
VerifyOrReturnError(ret == 0, CHIP_ERROR_POSIX(ret));

mHasValidChipTask = false;

return CHIP_NO_ERROR;
}

Expand Down Expand Up @@ -117,7 +113,11 @@ void GenericPlatformManagerImpl_POSIX<ImplClass>::_UnlockChipStack()
template <class ImplClass>
bool GenericPlatformManagerImpl_POSIX<ImplClass>::_IsChipStackLockedByCurrentThread() const
{
return !mHasValidChipTask || (mChipStackIsLocked && (pthread_equal(pthread_self(), mChipStackLockOwnerThread)));
// If no Matter thread is currently running we do not have to worry about
// locking. Hence, this function always returns true in that case.
if (mState.load(std::memory_order_relaxed) == State::kStopped)
return true;
return mChipStackIsLocked && (pthread_equal(pthread_self(), mChipStackLockOwnerThread));
}
#endif

Expand Down Expand Up @@ -153,18 +153,16 @@ void GenericPlatformManagerImpl_POSIX<ImplClass>::_RunEventLoop()
pthread_mutex_lock(&mStateLock);

//
// If we haven't set mHasValidChipTask by now, it means that the application did not call StartEventLoopTask
// and consequently, are running the event loop from their own, externally managed task.
// Let's track his appropriately since we need this info later when stopping the event queues.
// If we haven't set mInternallyManagedChipTask by now, it means that the application did not call
// StartEventLoopTask and consequently, are running the event loop from their own, externally managed
// task.
//
if (!mHasValidChipTask)
if (!mInternallyManagedChipTask)
{
mHasValidChipTask = true;
mChipTask = pthread_self();
mTaskType = kExternallyManagedTask;
mChipTask = pthread_self();
mState.store(State::kRunning, std::memory_order_relaxed);
}

mEventQueueHasStopped = false;
pthread_mutex_unlock(&mStateLock);

Impl()->LockChipStack();
Expand All @@ -187,14 +185,21 @@ void GenericPlatformManagerImpl_POSIX<ImplClass>::_RunEventLoop()
Impl()->UnlockChipStack();

pthread_mutex_lock(&mStateLock);
mEventQueueHasStopped = true;
mState.store(State::kStopping, std::memory_order_relaxed);
pthread_mutex_unlock(&mStateLock);

//
// Wake up anyone blocked waiting for the event queue to stop in
// StopEventLoopTask().
//
pthread_cond_signal(&mEventQueueStoppedCond);

//
// Mark event loop as truly stopped. After that line, we can not use any
// non-simple type member variables, because they can be destroyed by the
// Shutdown() method.
//
mState.store(State::kStopped, std::memory_order_relaxed);
}

template <class ImplClass>
Expand Down Expand Up @@ -230,8 +235,8 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_StartEventLoopTask()
err = pthread_create(&mChipTask, &mChipTaskAttr, EventLoopTaskMain, this);
if (err == 0)
{
mHasValidChipTask = true;
mTaskType = kInternallyManagedTask;
mInternallyManagedChipTask = true;
mState.store(State::kRunning, std::memory_order_relaxed);
}

pthread_mutex_unlock(&mStateLock);
Expand All @@ -255,7 +260,8 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_StopEventLoopTask()
// If we're calling this from a different thread than the one running chip, then
// we need to wait till the event queue has completely stopped before proceeding.
//
if (mHasValidChipTask && (pthread_equal(pthread_self(), mChipTask) == 0))
auto isRunning = mState.load(std::memory_order_relaxed) == State::kRunning;
if (isRunning && (pthread_equal(pthread_self(), mChipTask) == 0))
{
pthread_mutex_unlock(&mStateLock);

Expand All @@ -269,7 +275,7 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_StopEventLoopTask()

pthread_mutex_lock(&mStateLock);

while (!mEventQueueHasStopped)
while (mState.load(std::memory_order_relaxed) == State::kRunning)
{
err = pthread_cond_wait(&mEventQueueStoppedCond, &mStateLock);
VerifyOrExit(err == 0, );
Expand All @@ -280,7 +286,7 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_StopEventLoopTask()
//
// Wait further for the thread to terminate if we had previously created it.
//
if (mTaskType == kInternallyManagedTask)
if (mInternallyManagedChipTask)
{
err = pthread_join(mChipTask, nullptr);
VerifyOrExit(err == 0, );
Expand All @@ -292,13 +298,19 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX<ImplClass>::_StopEventLoopTask()
}

exit:
mHasValidChipTask = false;
return CHIP_ERROR_POSIX(err);
}

template <class ImplClass>
void GenericPlatformManagerImpl_POSIX<ImplClass>::_Shutdown()
{
//
// We cannot shutdown the stack while the event loop is still running. This can lead
// to use after free errors - here we are destroying mutex and condition variable that
// are still in use by the event loop!
//
VerifyOrDie(mState.load(std::memory_order_relaxed) == State::kStopped);

pthread_mutex_destroy(&mStateLock);
pthread_cond_destroy(&mEventQueueStoppedCond);

Expand Down

0 comments on commit ad3b697

Please sign in to comment.