From ba3f50716c7720259ac0c04e4beedd03967231b4 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 3 Aug 2023 10:33:04 +0200 Subject: [PATCH] unify as_spillable_buffer, as_exposure_tracked_buffer, and as_buffer --- .../source/developer_guide/library_design.md | 14 ++-- python/cudf/cudf/core/buffer/buffer.py | 37 ++++++++- .../core/buffer/exposure_tracked_buffer.py | 4 +- .../cudf/cudf/core/buffer/spillable_buffer.py | 73 ++---------------- python/cudf/cudf/core/buffer/utils.py | 75 ++++++++++++++++--- 5 files changed, 112 insertions(+), 91 deletions(-) diff --git a/docs/cudf/source/developer_guide/library_design.md b/docs/cudf/source/developer_guide/library_design.md index 016c2c1d281..1c85e71128c 100644 --- a/docs/cudf/source/developer_guide/library_design.md +++ b/docs/cudf/source/developer_guide/library_design.md @@ -325,26 +325,26 @@ This section describes the internal implementation details of the copy-on-write It is recommended that developers familiarize themselves with [the user-facing documentation](copy-on-write-user-doc) of this functionality before reading through the internals below. -The core copy-on-write implementation relies on the factory function `as_exposure_tracked_buffer` and the two classes `ExposureTrackedBuffer` and `BufferSlice`. +The core copy-on-write implementation relies on the two classes `ExposureTrackedBufferOwner` and `ExposureTrackedBuffer`. -An `ExposureTrackedBuffer` is a subclass of the regular `Buffer` that tracks internal and external references to its underlying memory. Internal references are tracked by maintaining [weak references](https://docs.python.org/3/library/weakref.html) to every `BufferSlice` of the underlying memory. External references are tracked through "exposure" status of the underlying memory. A buffer is considered exposed if the device pointer (integer or void*) has been handed out to a library outside of cudf. In this case, we have no way of knowing if the data are being modified by a third party. +An `ExposureTrackedBufferOwner` is a subclass of the `BufferOwner` that tracks internal and external references to its underlying memory. Internal references are tracked by maintaining [weak references](https://docs.python.org/3/library/weakref.html) to every `ExposureTrackedBuffer` of the underlying memory. External references are tracked through "exposure" status of the underlying memory. A buffer is considered exposed if the device pointer (integer or void*) has been handed out to a library outside of cudf. In this case, we have no way of knowing if the data are being modified by a third party. -`BufferSlice` is a subclass of `ExposureTrackedBuffer` that represents a _slice_ of the memory underlying a exposure tracked buffer. +`ExposureTrackedBuffer` is a subclass of `Buffer` that represents a _slice_ of the memory underlying an exposure tracked buffer. -When the cudf option `"copy_on_write"` is `True`, `as_buffer` calls `as_exposure_tracked_buffer`, which always returns a `BufferSlice`. It is then the slices that determine whether or not to make a copy when a write operation is performed on a `Column` (see below). If multiple slices point to the same underlying memory, then a copy must be made whenever a modification is attempted. +When the cudf option `"copy_on_write"` is `True`, `as_buffer` returns a `ExposureTrackedBuffer`. It is this class that determine whether or not to make a copy when a write operation is performed on a `Column` (see below). If multiple slices point to the same underlying memory, then a copy must be made whenever a modification is attempted. ### Eager copies when exposing to third-party libraries -If a `Column`/`BufferSlice` is exposed to a third-party library via `__cuda_array_interface__`, we are no longer able to track whether or not modification of the buffer has occurred. Hence whenever +If a `Column`/`ExposureTrackedBuffer` is exposed to a third-party library via `__cuda_array_interface__`, we are no longer able to track whether or not modification of the buffer has occurred. Hence whenever someone accesses data through the `__cuda_array_interface__`, we eagerly trigger the copy by calling -`.make_single_owner_inplace` which ensures a true copy of underlying data is made and that the slice is the sole owner. Any future copy requests must also trigger a true physical copy (since we cannot track the lifetime of the third-party object). To handle this we also mark the `Column`/`BufferSlice` as exposed thus indicating that any future shallow-copy requests will trigger a true physical copy rather than a copy-on-write shallow copy. +`.make_single_owner_inplace` which ensures a true copy of underlying data is made and that the slice is the sole owner. Any future copy requests must also trigger a true physical copy (since we cannot track the lifetime of the third-party object). To handle this we also mark the `Column`/`ExposureTrackedBuffer` as exposed thus indicating that any future shallow-copy requests will trigger a true physical copy rather than a copy-on-write shallow copy. ### Obtaining a read-only object A read-only object can be quite useful for operations that will not mutate the data. This can be achieved by calling `.get_ptr(mode="read")`, and using `cuda_array_interface_wrapper` to wrap a `__cuda_array_interface__` object around it. -This will not trigger a deep copy even if multiple `BufferSlice` points to the same `ExposureTrackedBuffer`. This API should only be used when the lifetime of the proxy object is restricted to cudf's internal code execution. Handing this out to external libraries or user-facing APIs will lead to untracked references and undefined copy-on-write behavior. We currently use this API for device to host +This will not trigger a deep copy even if multiple `ExposureTrackedBuffer` points to the same `ExposureTrackedBufferOwner`. This API should only be used when the lifetime of the proxy object is restricted to cudf's internal code execution. Handing this out to external libraries or user-facing APIs will lead to untracked references and undefined copy-on-write behavior. We currently use this API for device to host copies like in `ColumnBase.data_array_view(mode="read")` which is used for `Column.values_host`. diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py index a2b29a00c95..7749cb45ee4 100644 --- a/python/cudf/cudf/core/buffer/buffer.py +++ b/python/cudf/cudf/core/buffer/buffer.py @@ -53,6 +53,30 @@ def get_owner(data, klass: Type[T]) -> Optional[T]: return None +def get_buffer_owner(data) -> Optional[BufferOwner]: + """Get the owner of `data`, if any exist + + Search through the stack of data owners in order to find an + owner BufferOwner (incl. subclasses). + + Parameters + ---------- + data + The data object + + Return + ------ + BufferOwner or None + The owner of `data` if found otherwise None. + """ + + if isinstance(data, BufferOwner): + return data + if hasattr(data, "owner"): + return get_buffer_owner(data.owner) + return None + + def host_memory_allocation(nbytes: int) -> memoryview: """Allocate host memory using NumPy @@ -144,7 +168,7 @@ class BufferOwner(Serializable): _owner: object @classmethod - def _from_device_memory(cls, data: Any) -> Self: + def _from_device_memory(cls, data: Any, exposed: bool) -> Self: """Create from an object exposing `__cuda_array_interface__`. No data is being copied. @@ -153,6 +177,10 @@ def _from_device_memory(cls, data: Any) -> Self: ---------- data : device-buffer-like An object implementing the CUDA Array Interface. + exposed : bool + Mark the buffer as permanently exposed. This is used by + ExposureTrackedBuffer to determine when a deep copy is required + and by SpillableBuffer to mark the buffer unspillable. Returns ------- @@ -203,7 +231,7 @@ def _from_host_memory(cls, data: Any) -> Self: # Copy to device memory buf = rmm.DeviceBuffer(ptr=ptr, size=size) # Create from device memory - return cls._from_device_memory(buf) + return cls._from_device_memory(buf, exposed=False) @property def size(self) -> int: @@ -376,7 +404,8 @@ def copy(self, deep: bool = True) -> Self: rmm.DeviceBuffer( ptr=self._owner.get_ptr(mode="read") + self._offset, size=self.size, - ) + ), + exposed=False, ) return self.__class__(owner=owner, offset=0, size=owner.size) @@ -435,7 +464,7 @@ def deserialize(cls, header: dict, frames: list) -> Self: owner_type: BufferOwner = pickle.loads(header["owner-type-serialized"]) if hasattr(frame, "__cuda_array_interface__"): - owner = owner_type._from_device_memory(frame) + owner = owner_type._from_device_memory(frame, exposed=False) else: owner = owner_type._from_host_memory(frame) return cls( diff --git a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py index c8a93607d7d..050e5aa14a1 100644 --- a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py +++ b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py @@ -105,8 +105,8 @@ def mark_exposed(self) -> None: self._exposed = True @classmethod - def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: - ret = super()._from_device_memory(data) + def _from_device_memory(cls, data: Any, exposed: bool) -> Self: + ret = super()._from_device_memory(data, exposed=exposed) ret._exposed = exposed ret._slices = weakref.WeakSet() return ret diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py index 08f8fefbf89..ddee018d415 100644 --- a/python/cudf/cudf/core/buffer/spillable_buffer.py +++ b/python/cudf/cudf/core/buffer/spillable_buffer.py @@ -18,8 +18,6 @@ Buffer, BufferOwner, cuda_array_interface_wrapper, - get_owner, - get_ptr_and_size, host_memory_allocation, ) from cudf.utils.string import format_bytes @@ -28,68 +26,6 @@ from cudf.core.buffer.spill_manager import SpillManager -def as_spillable_buffer(data, exposed: bool) -> SpillableBuffer: - """Factory function to wrap `data` in a SpillableBuffer object. - - If `data` isn't a buffer already, a new buffer that points to the memory of - `data` is created. If `data` represents host memory, it is copied to a new - `rmm.DeviceBuffer` device allocation. Otherwise, the memory of `data` is - **not** copied, instead the new buffer keeps a reference to `data` in order - to retain its lifetime. - - If `data` is owned by a spillable buffer, a "slice" of the buffer is - returned. In this case, the spillable buffer must either be "exposed" or - spilled locked (called within an acquire_spill_lock context). This is to - guarantee that the memory of `data` isn't spilled before this function gets - to calculate the offset of the new slice. - - It is illegal for a spillable buffer to own another spillable buffer. - - Parameters - ---------- - data : buffer-like or array-like - A buffer-like or array-like object that represent C-contiguous memory. - exposed : bool, optional - Mark the buffer as permanently exposed (unspillable). - - Return - ------ - SpillableBuffer - A spillabe buffer instance that represents the device memory of `data`. - """ - - from cudf.core.buffer.utils import get_spill_lock - - if not hasattr(data, "__cuda_array_interface__"): - if exposed: - raise ValueError("cannot created exposed host memory") - return SpillableBuffer( - owner=SpillableBufferOwner._from_host_memory(data) - ) - - owner = get_owner(data, SpillableBufferOwner) - if owner is not None: - if not owner.exposed and get_spill_lock() is None: - raise ValueError( - "A owning spillable buffer must " - "either be exposed or spilled locked." - ) - # `data` is owned by an spillable buffer, which is exposed or - # spilled locked. - ptr, size = get_ptr_and_size(data.__cuda_array_interface__) - base_ptr = owner.get_ptr(mode="read") - if size > 0 and owner._ptr == 0: - raise ValueError( - "Cannot create a non-empty slice of a null buffer" - ) - return SpillableBuffer(owner=owner, offset=ptr - base_ptr, size=size) - - # `data` is new device memory - return SpillableBuffer( - owner=SpillableBufferOwner._from_device_memory(data, exposed=exposed) - ) - - class SpillLock: pass @@ -186,7 +122,7 @@ def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None: self._manager.add(self) @classmethod - def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: + def _from_device_memory(cls, data: Any, exposed: bool) -> Self: """Create a spillabe buffer from device memory. No data is being copied. @@ -195,7 +131,7 @@ def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: ---------- data : device-buffer-like An object implementing the CUDA Array Interface. - exposed : bool, optional + exposed : bool Mark the buffer as permanently exposed (unspillable). Returns @@ -203,7 +139,7 @@ def _from_device_memory(cls, data: Any, *, exposed: bool = False) -> Self: SpillableBufferOwner Buffer representing the same device memory as `data` """ - ret = super()._from_device_memory(data) + ret = super()._from_device_memory(data, exposed=exposed) ret._finalize_init(ptr_desc={"type": "gpu"}, exposed=exposed) return ret @@ -525,7 +461,8 @@ def serialize(self) -> Tuple[dict, list]: ptr=ptr, size=size, owner=(self._owner, spill_lock), - ) + ), + exposed=False, ) ] return header, frames diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py index e3ab1b7d3b0..3e2dc11e993 100644 --- a/python/cudf/cudf/core/buffer/utils.py +++ b/python/cudf/cudf/core/buffer/utils.py @@ -4,16 +4,25 @@ import threading from contextlib import ContextDecorator -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Type, Union from cudf.core.buffer.buffer import ( Buffer, BufferOwner, cuda_array_interface_wrapper, + get_buffer_owner, + get_ptr_and_size, +) +from cudf.core.buffer.exposure_tracked_buffer import ( + ExposureTrackedBuffer, + ExposureTrackedBufferOwner, ) -from cudf.core.buffer.exposure_tracked_buffer import as_exposure_tracked_buffer from cudf.core.buffer.spill_manager import get_global_manager -from cudf.core.buffer.spillable_buffer import SpillLock, as_spillable_buffer +from cudf.core.buffer.spillable_buffer import ( + SpillableBuffer, + SpillableBufferOwner, + SpillLock, +) from cudf.options import get_option @@ -34,7 +43,18 @@ def as_buffer( If `data` is an integer, it is assumed to point to device memory. - Raises ValueError if data isn't C-contiguous. + Raises ValueError if `data` isn't C-contiguous. + + If copy-on-write is enabled, a ExposureTrackedBuffer that refers to a + ExposureTrackedBufferOwner is returned. + + If spilling is enabled, a SpillableBuffer that refers ot a + SpillableBufferOwner is returned. If `data` is owned by a spillable buffer, + it must either be "exposed" or spilled locked (called within an + acquire_spill_lock context). This is to guarantee that the memory of `data` + isn't spilled before this function gets to calculate the offset of the new + SpillableBuffer. + Parameters ---------- @@ -77,14 +97,49 @@ def as_buffer( "`data` is a buffer-like or array-like object" ) - if get_option("copy_on_write"): - return as_exposure_tracked_buffer(data, exposed=exposed) + # Find the buffer types to return based on the current config + owner_class: Type[BufferOwner] + buffer_class: Type[Buffer] if get_global_manager() is not None: - return as_spillable_buffer(data, exposed=exposed) - if hasattr(data, "__cuda_array_interface__"): - return Buffer(owner=BufferOwner._from_device_memory(data)) + owner_class = SpillableBufferOwner + buffer_class = SpillableBuffer + elif get_option("copy_on_write"): + owner_class = ExposureTrackedBufferOwner + buffer_class = ExposureTrackedBuffer else: - return Buffer(owner=BufferOwner._from_host_memory(data)) + owner_class = BufferOwner + buffer_class = Buffer + + # Handle host memory, + if not hasattr(data, "__cuda_array_interface__"): + if exposed: + raise ValueError("cannot created exposed host memory") + return buffer_class(owner=owner_class._from_host_memory(data)) + + # Check if `data` is owned by a known class + owner = get_buffer_owner(data) + if owner is None: # `data` is new device memory + return buffer_class( + owner=owner_class._from_device_memory(data, exposed=exposed) + ) + + # At this point, we know that `data` is owned by a known class, which + # should be the same class as specified by the current config (see above) + assert owner.__class__ is owner_class + if ( + isinstance(owner, SpillableBufferOwner) + and not owner.exposed + and get_spill_lock() is None + ): + raise ValueError( + "A owning spillable buffer must " + "either be exposed or spilled locked." + ) + ptr, size = get_ptr_and_size(data.__cuda_array_interface__) + base_ptr = owner.get_ptr(mode="read") + if size > 0 and base_ptr == 0: + raise ValueError("Cannot create a non-empty slice of a null buffer") + return buffer_class(owner=owner, offset=ptr - base_ptr, size=size) _thread_spill_locks: Dict[int, Tuple[Optional[SpillLock], int]] = {}