Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent DeviceBuffer DeviceMemoryResource premature release #931

Merged
merged 8 commits into from
Jan 5, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions python/rmm/_lib/device_buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@
# limitations under the License.

from libc.stdint cimport uintptr_t
from libcpp.memory cimport unique_ptr
from libcpp.memory cimport shared_ptr, unique_ptr

from rmm._cuda.stream cimport Stream
from rmm._lib.cuda_stream_view cimport cuda_stream_view
from rmm._lib.memory_resource cimport DeviceMemoryResource
viclafargue marked this conversation as resolved.
Show resolved Hide resolved


cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
namespace "rmm::mr" nogil:
cdef cppclass device_memory_resource:
pass

viclafargue marked this conversation as resolved.
Show resolved Hide resolved

cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
cdef cppclass device_buffer:
device_buffer()
Expand All @@ -36,10 +42,12 @@ cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil:
cdef class DeviceBuffer:
cdef unique_ptr[device_buffer] c_obj

# Holds a reference to the DeviceMemoryResource used for allocation.
# Ensures the MR does not get destroyed before this DeviceBuffer. `mr` is
# needed for deallocation
cdef DeviceMemoryResource mr
# Holds a reference to the memory resource used for allocation
# and ensures that it is not destroyed before the device buffer
# is deallocated. A reference to a DeviceMemoryResource cannot
# be used as it could be released prematurely because of the circle
# reference breaking mechanism.
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
cdef shared_ptr[device_memory_resource] mr

# Holds a reference to the stream used by the underlying `device_buffer`.
# Ensures the stream does not get destroyed before this DeviceBuffer
Expand Down
2 changes: 1 addition & 1 deletion python/rmm/_lib/device_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ cdef class DeviceBuffer:
stream.c_synchronize()

# Save a reference to the MR and stream used for allocation
self.mr = get_current_device_resource()
self.mr = get_current_device_resource().c_obj
self.stream = stream

def __len__(self):
Expand Down
24 changes: 24 additions & 0 deletions python/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,3 +691,27 @@ def callback(nbytes: int) -> bool:
with pytest.raises(MemoryError):
rmm.DeviceBuffer(size=int(1e11))
assert retried[0]


def test_dev_buf_circle_ref_dealloc():
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource())

dbuf1 = rmm.DeviceBuffer(size=1_000_000)

# Make dbuf1 part of a reference cycle:
l1 = [dbuf1]
l1.append(l1)

# due to the reference cycle, the device buffer doesn't actually get
# cleaned up until later, when we invoke `gc.collect()`:
del dbuf1, l1

rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource())

# by now, the only remaining reference to the *original* memory
# resource should be in `dbuf1`. However, the cyclic garbage collector
# will eliminate that reference when it clears the object via its
# `tp_clear` method. Later, when `tp_dealloc` attemps to actually
# deallocate `dbuf1` (which needs the MR alive), a segfault occurs.

gc.collect()