From ee604aef33b4e40c609d464f889928108b04f54c Mon Sep 17 00:00:00 2001 From: Anis Ladram Date: Fri, 11 Mar 2022 19:28:11 -0500 Subject: [PATCH 1/2] Fix free-before-alloc in multithreaded test Uses a CUDA event to make sure streamA and streamB are correctly synchronized. --- tests/mr/device/mr_multithreaded_tests.cpp | 31 +++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index 38c34d93f..b56468f15 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -179,6 +179,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, std::size_t num_allocations, std::list& allocations, std::mutex& mtx, + cudaEvent_t& event, rmm::cuda_stream_view stream) { constexpr std::size_t max_size{1_MiB}; @@ -191,6 +192,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(size, stream); { std::lock_guard lock(mtx); + EXPECT_EQ(cudaSuccess, cudaEventRecord(event, stream.value())); allocations.emplace_back(ptr, size); } } @@ -200,12 +202,14 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr, std::size_t num_allocations, std::list& allocations, std::mutex& mtx, + cudaEvent_t& event, rmm::cuda_stream_view stream) { for (std::size_t i = 0; i < num_allocations;) { std::lock_guard lock(mtx); if (allocations.empty()) { continue; } i++; + EXPECT_EQ(cudaSuccess, cudaStreamWaitEvent(stream.value(), event)); allocation alloc = allocations.front(); allocations.pop_front(); mr->deallocate(alloc.ptr, alloc.size, stream); @@ -220,15 +224,30 @@ void test_allocate_free_different_threads(rmm::mr::device_memory_resource* mr, std::mutex mtx; std::list allocations; - - std::thread producer( - allocate_loop, mr, num_allocations, std::ref(allocations), std::ref(mtx), streamA); - - std::thread consumer( - deallocate_loop, mr, num_allocations, std::ref(allocations), std::ref(mtx), streamB); + cudaEvent_t event; + + EXPECT_EQ(cudaSuccess, cudaEventCreate(&event)); + + std::thread producer(allocate_loop, + mr, + num_allocations, + std::ref(allocations), + std::ref(mtx), + std::ref(event), + streamA); + + std::thread consumer(deallocate_loop, + mr, + num_allocations, + std::ref(allocations), + std::ref(mtx), + std::ref(event), + streamB); producer.join(); consumer.join(); + + EXPECT_EQ(cudaSuccess, cudaEventDestroy(event)); } TEST_P(mr_test_mt, AllocFreeDifferentThreadsDefaultStream) From 23f8e5b5f2fab8b80552211376998ce64acace5b Mon Sep 17 00:00:00 2001 From: Anis Ladram Date: Tue, 15 Mar 2022 10:08:08 -0400 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Mark Harris --- tests/mr/device/mr_multithreaded_tests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/mr/device/mr_multithreaded_tests.cpp b/tests/mr/device/mr_multithreaded_tests.cpp index b56468f15..6d6d8edc2 100644 --- a/tests/mr/device/mr_multithreaded_tests.cpp +++ b/tests/mr/device/mr_multithreaded_tests.cpp @@ -192,7 +192,7 @@ void allocate_loop(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(size, stream); { std::lock_guard lock(mtx); - EXPECT_EQ(cudaSuccess, cudaEventRecord(event, stream.value())); + RMM_CUDA_TRY(cudaEventRecord(event, stream.value())); allocations.emplace_back(ptr, size); } } @@ -209,7 +209,7 @@ void deallocate_loop(rmm::mr::device_memory_resource* mr, std::lock_guard lock(mtx); if (allocations.empty()) { continue; } i++; - EXPECT_EQ(cudaSuccess, cudaStreamWaitEvent(stream.value(), event)); + RMM_CUDA_TRY(cudaStreamWaitEvent(stream.value(), event)); allocation alloc = allocations.front(); allocations.pop_front(); mr->deallocate(alloc.ptr, alloc.size, stream); @@ -226,7 +226,7 @@ void test_allocate_free_different_threads(rmm::mr::device_memory_resource* mr, std::list allocations; cudaEvent_t event; - EXPECT_EQ(cudaSuccess, cudaEventCreate(&event)); + RMM_CUDA_TRY(cudaEventCreate(&event)); std::thread producer(allocate_loop, mr, @@ -247,7 +247,7 @@ void test_allocate_free_different_threads(rmm::mr::device_memory_resource* mr, producer.join(); consumer.join(); - EXPECT_EQ(cudaSuccess, cudaEventDestroy(event)); + RMM_CUDA_TRY(cudaEventDestroy(event)); } TEST_P(mr_test_mt, AllocFreeDifferentThreadsDefaultStream)