From 75c0e2fd583424ea7f59eabeb05808cfdae18107 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 15 Dec 2021 18:11:48 +0100 Subject: [PATCH 1/8] Fix for DeviceBuffer --- python/rmm/_lib/device_buffer.pxd | 10 ++++++++-- python/rmm/_lib/device_buffer.pyx | 2 +- python/rmm/_lib/memory_resource.pxd | 1 + python/rmm/_lib/memory_resource.pyx | 3 +++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 635b1ed8a..8979fc636 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -13,13 +13,19 @@ # limitations under the License. from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr +from libcpp.memory cimport unique_ptr, shared_ptr from rmm._cuda.stream cimport Stream from rmm._lib.cuda_stream_view cimport cuda_stream_view from rmm._lib.memory_resource cimport DeviceMemoryResource +cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ + namespace "rmm::mr" nogil: + cdef cppclass device_memory_resource: + pass + + cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef cppclass device_buffer: device_buffer() @@ -39,7 +45,7 @@ cdef class DeviceBuffer: # Holds a reference to the DeviceMemoryResource used for allocation. # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is # needed for deallocation - cdef DeviceMemoryResource mr + cdef shared_ptr[device_memory_resource] mr # Holds a reference to the stream used by the underlying `device_buffer`. # Ensures the stream does not get destroyed before this DeviceBuffer diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index cbe0bdb33..05d5067d6 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -89,7 +89,7 @@ cdef class DeviceBuffer: stream.c_synchronize() # Save a reference to the MR and stream used for allocation - self.mr = get_current_device_resource() + self.mr = get_current_device_resource().get() self.stream = stream def __len__(self): diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 3a71fd500..62745cf98 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -26,6 +26,7 @@ cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ cdef class DeviceMemoryResource: cdef shared_ptr[device_memory_resource] c_obj + cdef shared_ptr[device_memory_resource] get(self) cdef device_memory_resource* get_mr(self) cdef class UpstreamResourceAdaptor(DeviceMemoryResource): diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index d7711ed8a..4be47017d 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -162,6 +162,9 @@ cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: cdef class DeviceMemoryResource: + cdef shared_ptr[device_memory_resource] get(self): + return self.c_obj + cdef device_memory_resource* get_mr(self): return self.c_obj.get() From 59836ec3c986c474560c30f36b1495488864b6b7 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Wed, 15 Dec 2021 18:22:19 +0100 Subject: [PATCH 2/8] Update doc --- python/rmm/_lib/device_buffer.pxd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 8979fc636..a1fce69b0 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -42,9 +42,9 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj - # Holds a reference to the DeviceMemoryResource used for allocation. - # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is - # needed for deallocation + # Holds a reference to the device_memory_resource used for allocation. + # Ensures the memory resource does not get destroyed before + # this DeviceBuffer as it is needed for deallocation. cdef shared_ptr[device_memory_resource] mr # Holds a reference to the stream used by the underlying `device_buffer`. From 69c2a7b9a34f498be3baf38e90ba7edbc8f387b7 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 16 Dec 2021 15:35:37 +0100 Subject: [PATCH 3/8] Review requests --- python/rmm/_lib/device_buffer.pxd | 10 ++++++---- python/rmm/_lib/device_buffer.pyx | 2 +- python/rmm/_lib/memory_resource.pxd | 1 - python/rmm/_lib/memory_resource.pyx | 3 --- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index a1fce69b0..8044e751d 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -13,7 +13,7 @@ # limitations under the License. from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr, shared_ptr +from libcpp.memory cimport shared_ptr, unique_ptr from rmm._cuda.stream cimport Stream from rmm._lib.cuda_stream_view cimport cuda_stream_view @@ -42,9 +42,11 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj - # Holds a reference to the device_memory_resource used for allocation. - # Ensures the memory resource does not get destroyed before - # this DeviceBuffer as it is needed for deallocation. + # Holds a reference to the memory resource used for allocation + # and ensures that it is not destroyed before the device buffer + # is deallocated. A reference to a DeviceMemoryResource cannot + # be used as it could be released prematurely because of the circle + # reference breaking mechanism. cdef shared_ptr[device_memory_resource] mr # Holds a reference to the stream used by the underlying `device_buffer`. diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 05d5067d6..87596cb75 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -89,7 +89,7 @@ cdef class DeviceBuffer: stream.c_synchronize() # Save a reference to the MR and stream used for allocation - self.mr = get_current_device_resource().get() + self.mr = get_current_device_resource().c_obj self.stream = stream def __len__(self): diff --git a/python/rmm/_lib/memory_resource.pxd b/python/rmm/_lib/memory_resource.pxd index 62745cf98..3a71fd500 100644 --- a/python/rmm/_lib/memory_resource.pxd +++ b/python/rmm/_lib/memory_resource.pxd @@ -26,7 +26,6 @@ cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ cdef class DeviceMemoryResource: cdef shared_ptr[device_memory_resource] c_obj - cdef shared_ptr[device_memory_resource] get(self) cdef device_memory_resource* get_mr(self) cdef class UpstreamResourceAdaptor(DeviceMemoryResource): diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index 4be47017d..d7711ed8a 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -162,9 +162,6 @@ cdef extern from "rmm/mr/device/per_device_resource.hpp" namespace "rmm" nogil: cdef class DeviceMemoryResource: - cdef shared_ptr[device_memory_resource] get(self): - return self.c_obj - cdef device_memory_resource* get_mr(self): return self.c_obj.get() From 41fcdabeda421e177e98ace57ad68b7cf30e317d Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 16 Dec 2021 15:53:36 +0100 Subject: [PATCH 4/8] Adding test --- python/rmm/tests/test_rmm.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 96b36017b..446c9e7be 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -691,3 +691,27 @@ def callback(nbytes: int) -> bool: with pytest.raises(MemoryError): rmm.DeviceBuffer(size=int(1e11)) assert retried[0] + + +def test_dev_buf_circle_ref_dealloc(): + rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource()) + + dbuf1 = rmm.DeviceBuffer(size=1_000_000) + + # Make dbuf1 part of a reference cycle: + l1 = [dbuf1] + l1.append(l1) + + # due to the reference cycle, the device buffer doesn't actually get + # cleaned up until later, when we invoke `gc.collect()`: + del dbuf1, l1 + + rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource()) + + # by now, the only remaining reference to the *original* memory + # resource should be in `dbuf1`. However, the cyclic garbage collector + # will eliminate that reference when it clears the object via its + # `tp_clear` method. Later, when `tp_dealloc` attemps to actually + # deallocate `dbuf1` (which needs the MR alive), a segfault occurs. + + gc.collect() From 0515d602e96d5c255a549e3db380d9d46836666f Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 16 Dec 2021 16:37:17 +0100 Subject: [PATCH 5/8] Update doc --- python/rmm/_lib/device_buffer.pxd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 8044e751d..786c87543 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -45,8 +45,9 @@ cdef class DeviceBuffer: # Holds a reference to the memory resource used for allocation # and ensures that it is not destroyed before the device buffer # is deallocated. A reference to a DeviceMemoryResource cannot - # be used as it could be released prematurely because of the circle - # reference breaking mechanism. + # be used as it could be released prematurely by the gc if the DeviceBuffer + # is in a reference cycle. + # See https://github.com/rapidsai/rmm/pull/931 for details. cdef shared_ptr[device_memory_resource] mr # Holds a reference to the stream used by the underlying `device_buffer`. From 905044723f1751789f403f6ecabd58db8665ddc2 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 16 Dec 2021 16:41:22 +0100 Subject: [PATCH 6/8] Modify imports --- python/rmm/_lib/device_buffer.pxd | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 786c87543..f95176102 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -17,13 +17,7 @@ from libcpp.memory cimport shared_ptr, unique_ptr from rmm._cuda.stream cimport Stream from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.memory_resource cimport DeviceMemoryResource - - -cdef extern from "rmm/mr/device/device_memory_resource.hpp" \ - namespace "rmm::mr" nogil: - cdef cppclass device_memory_resource: - pass +from rmm._lib.memory_resource cimport device_memory_resource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: From 507fa0606d7536446674e63e59ef6e9fa45e3e34 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Fri, 17 Dec 2021 15:57:17 +0100 Subject: [PATCH 7/8] Use no_gc_clear decorator --- python/rmm/_lib/device_buffer.pxd | 15 ++++++--------- python/rmm/_lib/device_buffer.pyx | 7 ++++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index f95176102..635b1ed8a 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -13,11 +13,11 @@ # limitations under the License. from libc.stdint cimport uintptr_t -from libcpp.memory cimport shared_ptr, unique_ptr +from libcpp.memory cimport unique_ptr from rmm._cuda.stream cimport Stream from rmm._lib.cuda_stream_view cimport cuda_stream_view -from rmm._lib.memory_resource cimport device_memory_resource +from rmm._lib.memory_resource cimport DeviceMemoryResource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: @@ -36,13 +36,10 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef class DeviceBuffer: cdef unique_ptr[device_buffer] c_obj - # Holds a reference to the memory resource used for allocation - # and ensures that it is not destroyed before the device buffer - # is deallocated. A reference to a DeviceMemoryResource cannot - # be used as it could be released prematurely by the gc if the DeviceBuffer - # is in a reference cycle. - # See https://github.com/rapidsai/rmm/pull/931 for details. - cdef shared_ptr[device_memory_resource] mr + # Holds a reference to the DeviceMemoryResource used for allocation. + # Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is + # needed for deallocation + cdef DeviceMemoryResource mr # Holds a reference to the stream used by the underlying `device_buffer`. # Ensures the stream does not get destroyed before this DeviceBuffer diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 87596cb75..ece2f911f 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -37,6 +37,11 @@ from rmm._lib.lib cimport ( from rmm._lib.memory_resource cimport get_current_device_resource +# The DeviceMemoryResource attribute could be released prematurely +# by the gc if the DeviceBuffer is in a reference cycle. Removing +# the tp_clear function with the no_gc_clear decoration prevents that. +# See https://github.com/rapidsai/rmm/pull/931 for details. +@cython.no_gc_clear cdef class DeviceBuffer: def __cinit__(self, *, @@ -89,7 +94,7 @@ cdef class DeviceBuffer: stream.c_synchronize() # Save a reference to the MR and stream used for allocation - self.mr = get_current_device_resource().c_obj + self.mr = get_current_device_resource() self.stream = stream def __len__(self): From 2cf2dbb2e15f9eff68defa6d12f205eef9970a68 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Mon, 20 Dec 2021 14:40:47 +0100 Subject: [PATCH 8/8] Addition to RMM test doc --- python/rmm/tests/test_rmm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/rmm/tests/test_rmm.py b/python/rmm/tests/test_rmm.py index 446c9e7be..c82c306c7 100644 --- a/python/rmm/tests/test_rmm.py +++ b/python/rmm/tests/test_rmm.py @@ -492,7 +492,8 @@ def test_rmm_enable_disable_logging(dtype, nelem, alloc, tmpdir): def test_mr_devicebuffer_lifetime(): # Test ensures MR/Stream lifetime is longer than DeviceBuffer. Even if all # references go out of scope - # Create new Pool MR + # It is necessary to verify that it also works when using an upstream : + # here a Pool MR with the current MR as upstream rmm.mr.set_current_device_resource( rmm.mr.PoolMemoryResource(rmm.mr.get_current_device_resource()) )