From e49f781f7d6c5a2b1afd2f0bbac7eed8381ec18f Mon Sep 17 00:00:00 2001 From: Daniele Di Proietto Date: Mon, 21 Oct 2024 09:39:54 +0000 Subject: [PATCH] shared_memory_arbiter: Fix TSAN race on destruction SharedMemoryArbiterImpl should be destroyed (on the task_runner thread) only after `TryShutdown()` returns true. This happens only after all TraceWriters that hold a reference to the SharedMemoryArbiterImpl have been destroyed (they call `ReleaseWriterID()` on destruction). TraceWriters can be owned by different threads. Our TSAN CI sometimes reports a failure on the test `PerfettoApiTest.SystemDisconnectAsyncOnStopNoTracing`. ``` [00:13:55] WARNING: ThreadSanitizer: data race (pid=12265) [00:13:55] Write of size 8 at 0x72440000a4d0 by thread T3: [00:13:55] #0 operator delete(void*) ??:? (perfetto_integrationtests+0x5c18f9) (BuildId: 1cd20ac359351a2f) [00:13:55] #1 perfetto::SharedMemoryArbiterImpl::~SharedMemoryArbiterImpl() shared_memory_arbiter_impl.cc:? (perfetto_integrationtests+0x9da501) (BuildId: 1cd20ac359351a2f) [00:13:55] #2 perfetto::ProducerIPCClientImpl::~ProducerIPCClientImpl() producer_ipc_client_impl.cc:? (perfetto_integrationtests+0xb5f2d5) (BuildId: 1cd20ac359351a2f) [00:13:55] #3 perfetto::ProducerIPCClientImpl::~ProducerIPCClientImpl() producer_ipc_client_impl.cc:? (perfetto_integrationtests+0xb5f3f9) (BuildId: 1cd20ac359351a2f) [00:13:55] #4 std::__Cr::__shared_ptr_pointer >)::$_0, std::__Cr::allocator >::__on_zero_shared() tracing_muxer_impl.cc:? (perfetto_integrationtests+0x94767d) (BuildId: 1cd20ac359351a2f) [00:13:55] #5 perfetto::internal::TracingMuxerImpl::StopDataSource_AsyncEnd(unsigned long, unsigned int, unsigned long, perfetto::internal::TracingMuxerImpl::FindDataSourceRes const&) tracing_muxer_impl.cc:? (perfetto_integrationtests+0x941e3d) (BuildId: 1cd20ac359351a2f) [00:13:55] #6 void std::__Cr::__function::__policy_invoker::__call_impl >(std::__Cr::__function::__policy_storage const*) tracing_muxer_impl.cc:? (perfetto_integrationtests+0x94b988) (BuildId: 1cd20ac359351a2f) [00:13:55] #7 perfetto::base::UnixTaskRunner::RunImmediateAndDelayedTask() unix_task_runner.cc:? (perfetto_integrationtests+0x8da687) (BuildId: 1cd20ac359351a2f) [00:13:55] #8 perfetto::base::UnixTaskRunner::Run() unix_task_runner.cc:? (perfetto_integrationtests+0x8d9a0c) (BuildId: 1cd20ac359351a2f) [00:13:55] #9 perfetto::base::ThreadTaskRunner::RunTaskThread(std::__Cr::function) thread_task_runner.cc:? (perfetto_integrationtests+0x8d86f1) (BuildId: 1cd20ac359351a2f) [00:13:55] #10 void* std::__Cr::__thread_proxy >, void (perfetto::base::ThreadTaskRunner::*)(std::__Cr::function), perfetto::base::ThreadTaskRunner*, std::__Cr::function > >(void*) thread_task_runner.cc:? (perfetto_integrationtests+0x8d90ab) (BuildId: 1cd20ac359351a2f) [00:13:55] [00:13:55] Previous read of size 8 at 0x72440000a4d0 by thread T437: [00:13:55] #0 perfetto::SharedMemoryArbiterImpl::ReleaseWriterID(unsigned short) shared_memory_arbiter_impl.cc:? (perfetto_integrationtests+0x9da0f8) (BuildId: 1cd20ac359351a2f) [00:13:55] #1 perfetto::TraceWriterImpl::~TraceWriterImpl() trace_writer_impl.cc:? (perfetto_integrationtests+0x9dc07a) (BuildId: 1cd20ac359351a2f) [00:13:55] #2 perfetto::TraceWriterImpl::~TraceWriterImpl() trace_writer_impl.cc:? (perfetto_integrationtests+0x9dc1c9) (BuildId: 1cd20ac359351a2f) [00:13:55] #3 std::__Cr::array::~array() virtual_destructors.cc:? (perfetto_integrationtests+0x961dad) (BuildId: 1cd20ac359351a2f) [00:13:55] #4 std::__Cr::array::~array() virtual_destructors.cc:? (perfetto_integrationtests+0x961861) (BuildId: 1cd20ac359351a2f) [00:13:55] #5 perfetto::internal::TracingTLS::~TracingTLS() virtual_destructors.cc:? (perfetto_integrationtests+0x9618b9) (BuildId: 1cd20ac359351a2f) [00:13:55] #6 perfetto::(anonymous namespace)::PlatformPosix::PlatformPosix()::$_0::__invoke(void*) platform_posix.cc:? (perfetto_integrationtests+0xc414d0) (BuildId: 1cd20ac359351a2f) [00:13:55] #7 __nptl_deallocate_tsd.part.8 ??:? (libpthread.so.0+0x71a0) (BuildId: 48041452aef93ddb2366ca0fa49da8f32684a9c8) ``` T3 is the task_runner thread, while T437 is a regular thread that's being destroyed after tracing something. This is a data race on SharedMemoryArbiterImpl::weak_ptr_factory_. Basically, the following can happen: * T437 calls `ReleaseWriterID()`: it acquires the lock, removes the last id from `active_writer_ids_` and releases the lock. * T3 calls `TracingMuxerImpl::ProducerImpl::SweepDeadServices()` then `TryShutdown()`, which returns true. * T3 destroys ShareMemoryArbiterImpl, including its weak_ptr_factory_. * T437 keeps executing `ReleaseWriterID()` and executes `weak_ptr_factory_.GetWeakPtr()`. * weak_ptr_factory_ is being destroyed by T3. Fix the problem by executing `weak_ptr_factory_.GetWeakPtr()` while still holding the lock. `TryShutdown()` cannot return true while we still hold the lock. Change-Id: I463811685fd9e05f4f00422e8e7aff6ba0cbdbac --- src/tracing/core/shared_memory_arbiter_impl.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tracing/core/shared_memory_arbiter_impl.cc b/src/tracing/core/shared_memory_arbiter_impl.cc index d9d774e05b..d611dfdd36 100644 --- a/src/tracing/core/shared_memory_arbiter_impl.cc +++ b/src/tracing/core/shared_memory_arbiter_impl.cc @@ -861,6 +861,7 @@ std::unique_ptr SharedMemoryArbiterImpl::CreateTraceWriterInternal( void SharedMemoryArbiterImpl::ReleaseWriterID(WriterID id) { base::TaskRunner* task_runner = nullptr; + base::WeakPtr weak_this; { std::lock_guard scoped_lock(lock_); active_writer_ids_.Free(id); @@ -879,12 +880,15 @@ void SharedMemoryArbiterImpl::ReleaseWriterID(WriterID id) { if (!task_runner_) return; + // If `active_writer_ids_` is empty, `TryShutdown()` can return true + // and `*this` can be deleted. Let's grab everything we need from `*this` + // before releasing the lock. + weak_this = weak_ptr_factory_.GetWeakPtr(); task_runner = task_runner_; } // scoped_lock // We shouldn't post tasks while locked. |task_runner| remains valid after // unlocking, because |task_runner_| is never reset. - auto weak_this = weak_ptr_factory_.GetWeakPtr(); task_runner->PostTask([weak_this, id] { if (weak_this) weak_this->producer_endpoint_->UnregisterTraceWriter(id);