Skip to content

Commit

Permalink
Rollback of DeviceBufferLike (#12009)
Browse files Browse the repository at this point in the history
This PR replaces `DeviceBufferLike` with `Buffer` and clear the way for a spillable sub-class of `Buffer`.

#### Context 
The introduction of the [`DeviceBufferLike`](#11447) protocol was motivated by [the spilling work](#11553), which we initially thought would have to be implemented in Cython. However, it can be done in pure Python, which makes `DeviceBufferLike` an unneeded complexity.  

#### Review notes 

- In order to introduce a spillable-buffer in the future, we still use a factory function, `as_buffer()`, to create Buffers. 
- `buffer.py` is moved into the submodule `core.buffer` to ease organization when adding the spillable-buffer and spilling manager. 

#### Breaking
This PR breaks external use of `Buffer` e.g. `Buffer.__init__` raise an exception now and the `"constructor-kwargs"` header from #4164 has been removed. 
Submitted a PR to fix this in cuml: rapidsai/cuml#4965

##

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ashwin Srinath (https://github.com/shwina)

URL: #12009
  • Loading branch information
madsbk authored Nov 2, 2022
1 parent 5ace809 commit d6a9e4a
Show file tree
Hide file tree
Showing 31 changed files with 559 additions and 539 deletions.
28 changes: 14 additions & 14 deletions python/cudf/cudf/_lib/column.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ from __future__ import annotations
from typing import Dict, Optional, Tuple, TypeVar

from cudf._typing import Dtype, DtypeObj, ScalarLike
from cudf.core.buffer import DeviceBufferLike
from cudf.core.buffer import Buffer
from cudf.core.column import ColumnBase

T = TypeVar("T")

class Column:
_data: Optional[DeviceBufferLike]
_mask: Optional[DeviceBufferLike]
_base_data: Optional[DeviceBufferLike]
_base_mask: Optional[DeviceBufferLike]
_data: Optional[Buffer]
_mask: Optional[Buffer]
_base_data: Optional[Buffer]
_base_mask: Optional[Buffer]
_dtype: DtypeObj
_size: int
_offset: int
Expand All @@ -25,10 +25,10 @@ class Column:

def __init__(
self,
data: Optional[DeviceBufferLike],
data: Optional[Buffer],
size: int,
dtype: Dtype,
mask: Optional[DeviceBufferLike] = None,
mask: Optional[Buffer] = None,
offset: int = None,
null_count: int = None,
children: Tuple[ColumnBase, ...] = (),
Expand All @@ -40,27 +40,27 @@ class Column:
@property
def size(self) -> int: ...
@property
def base_data(self) -> Optional[DeviceBufferLike]: ...
def base_data(self) -> Optional[Buffer]: ...
@property
def base_data_ptr(self) -> int: ...
@property
def data(self) -> Optional[DeviceBufferLike]: ...
def data(self) -> Optional[Buffer]: ...
@property
def data_ptr(self) -> int: ...
def set_base_data(self, value: DeviceBufferLike) -> None: ...
def set_base_data(self, value: Buffer) -> None: ...
@property
def nullable(self) -> bool: ...
def has_nulls(self, include_nan: bool = False) -> bool: ...
@property
def base_mask(self) -> Optional[DeviceBufferLike]: ...
def base_mask(self) -> Optional[Buffer]: ...
@property
def base_mask_ptr(self) -> int: ...
@property
def mask(self) -> Optional[DeviceBufferLike]: ...
def mask(self) -> Optional[Buffer]: ...
@property
def mask_ptr(self) -> int: ...
def set_base_mask(self, value: Optional[DeviceBufferLike]) -> None: ...
def set_mask(self: T, value: Optional[DeviceBufferLike]) -> T: ...
def set_base_mask(self, value: Optional[Buffer]) -> None: ...
def set_mask(self: T, value: Optional[Buffer]) -> T: ...
@property
def null_count(self) -> int: ...
@property
Expand Down
44 changes: 22 additions & 22 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import rmm
import cudf
import cudf._lib as libcudf
from cudf.api.types import is_categorical_dtype
from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
from cudf.core.buffer import Buffer, as_buffer

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
Expand Down Expand Up @@ -39,9 +39,9 @@ cdef class Column:
A Column stores columnar data in device memory.
A Column may be composed of:
* A *data* DeviceBufferLike
* A *data* Buffer
* One or more (optional) *children* Columns
* An (optional) *mask* DeviceBufferLike representing the nullmask
* An (optional) *mask* Buffer representing the nullmask
The *dtype* indicates the Column's element type.
"""
Expand Down Expand Up @@ -106,9 +106,9 @@ cdef class Column:
return self.data.ptr

def set_base_data(self, value):
if value is not None and not isinstance(value, DeviceBufferLike):
if value is not None and not isinstance(value, Buffer):
raise TypeError(
"Expected a DeviceBufferLike or None for data, "
"Expected a Buffer or None for data, "
f"got {type(value).__name__}"
)

Expand Down Expand Up @@ -155,17 +155,17 @@ cdef class Column:
modify size or offset in any way, so the passed mask is expected to be
compatible with the current offset.
"""
if value is not None and not isinstance(value, DeviceBufferLike):
if value is not None and not isinstance(value, Buffer):
raise TypeError(
"Expected a DeviceBufferLike or None for mask, "
"Expected a Buffer or None for mask, "
f"got {type(value).__name__}"
)

if value is not None:
required_size = bitmask_allocation_size_bytes(self.base_size)
if value.size < required_size:
error_msg = (
"The DeviceBufferLike for mask is smaller than expected, "
"The Buffer for mask is smaller than expected, "
f"got {value.size} bytes, expected {required_size} bytes."
)
if self.offset > 0 or self.size < self.base_size:
Expand Down Expand Up @@ -210,30 +210,30 @@ cdef class Column:
if isinstance(value, Column):
value = value.data_array_view
value = cp.asarray(value).view('|u1')
mask = as_device_buffer_like(value)
mask = as_buffer(value)
if mask.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
if mask.size < mask_size:
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_device(value)
mask = as_device_buffer_like(dbuf)
mask = as_buffer(dbuf)
elif hasattr(value, "__array_interface__"):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = as_device_buffer_like(dbuf)
mask = as_buffer(dbuf)
elif PyObject_CheckBuffer(value):
value = np.asarray(value).view("u1")[:mask_size]
if value.size < required_num_bytes:
raise ValueError(error_msg.format(str(value.size)))
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(value)
mask = as_device_buffer_like(dbuf)
mask = as_buffer(dbuf)
else:
raise TypeError(
"Expected a DeviceBufferLike object or None for mask, "
"Expected a Buffer object or None for mask, "
f"got {type(value).__name__}"
)

Expand Down Expand Up @@ -432,11 +432,11 @@ cdef class Column:
cdef column_contents contents = move(c_col.get()[0].release())

data = DeviceBuffer.c_from_unique_ptr(move(contents.data))
data = as_device_buffer_like(data)
data = as_buffer(data)

if null_count > 0:
mask = DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
mask = as_device_buffer_like(mask)
mask = as_buffer(mask)
else:
mask = None

Expand All @@ -461,8 +461,8 @@ cdef class Column:
Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it,
along with referencing an ``owner`` Python object that owns the memory
lifetime. If ``owner`` is a ``cudf.Column``, we reach inside of it and
make the owner of each newly created ``DeviceBufferLike`` the
respective ``DeviceBufferLike`` from the ``owner`` ``cudf.Column``.
make the owner of each newly created ``Buffer`` the respective
``Buffer`` from the ``owner`` ``cudf.Column``.
If ``owner`` is ``None``, we allocate new memory for the resulting
``cudf.Column``.
"""
Expand All @@ -487,18 +487,18 @@ cdef class Column:

if data_ptr:
if data_owner is None:
data = as_device_buffer_like(
data = as_buffer(
rmm.DeviceBuffer(ptr=data_ptr,
size=(size+offset) * dtype.itemsize)
)
else:
data = Buffer(
data = as_buffer(
data=data_ptr,
size=(base_size) * dtype.itemsize,
owner=data_owner
)
else:
data = as_device_buffer_like(
data = as_buffer(
rmm.DeviceBuffer(ptr=data_ptr, size=0)
)

Expand Down Expand Up @@ -528,14 +528,14 @@ cdef class Column:
# result:
mask = None
else:
mask = as_device_buffer_like(
mask = as_buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(size+offset)
)
)
else:
mask = Buffer(
mask = as_buffer(
data=mask_ptr,
size=bitmask_allocation_size_bytes(base_size),
owner=mask_owner
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from cudf._lib.utils cimport (
table_view_from_table,
)

from cudf.core.buffer import as_device_buffer_like
from cudf.core.buffer import as_buffer

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

Expand All @@ -31,7 +31,7 @@ cpdef concat_masks(object columns):
with nogil:
c_result = move(libcudf_concatenate_masks(c_views))
c_unique_result = make_unique[device_buffer](move(c_result))
return as_device_buffer_like(
return as_buffer(
DeviceBuffer.c_from_unique_ptr(move(c_unique_result))
)

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ from libcpp.vector cimport vector
from rmm._lib.device_buffer cimport DeviceBuffer

import cudf
from cudf.core.buffer import Buffer
from cudf.core.buffer import Buffer, as_buffer

from cudf._lib.column cimport Column

Expand Down Expand Up @@ -721,7 +721,7 @@ cdef class _CPackedColumns:
header = {}
frames = []

gpu_data = Buffer(
gpu_data = as_buffer(
data=self.gpu_data_ptr,
size=self.gpu_data_size,
owner=self
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport mask_state, size_type
from cudf._lib.utils cimport table_view_from_columns

from cudf.core.buffer import as_device_buffer_like
from cudf.core.buffer import as_buffer


class MaskState(Enum):
Expand Down Expand Up @@ -52,7 +52,7 @@ def copy_bitmask(Column col):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_device_buffer_like(rmm_db)
buf = as_buffer(rmm_db)
return buf


Expand Down Expand Up @@ -98,7 +98,7 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
up_db = make_unique[device_buffer](move(db))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_device_buffer_like(rmm_db)
buf = as_buffer(rmm_db)
return buf


Expand All @@ -110,7 +110,7 @@ def bitmask_and(columns: list):
c_result = move(cpp_bitmask_and(c_view))
up_db = make_unique[device_buffer](move(c_result.first))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_device_buffer_like(dbuf)
buf = as_buffer(dbuf)
return buf, c_result.second


Expand All @@ -122,5 +122,5 @@ def bitmask_or(columns: list):
c_result = move(cpp_bitmask_or(c_view))
up_db = make_unique[device_buffer](move(c_result.first))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_device_buffer_like(dbuf)
buf = as_buffer(dbuf)
return buf, c_result.second
12 changes: 6 additions & 6 deletions python/cudf/cudf/_lib/transform.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from numba.np import numpy_support
import cudf
from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
from cudf.core._internals.expressions import parse_expression
from cudf.core.buffer import as_device_buffer_like
from cudf.core.buffer import as_buffer
from cudf.utils import cudautils

from cython.operator cimport dereference
Expand Down Expand Up @@ -37,7 +37,7 @@ from cudf._lib.utils cimport (
def bools_to_mask(Column col):
"""
Given an int8 (boolean) column, compress the data from booleans to bits and
return a DeviceBufferLike
return a Buffer
"""
cdef column_view col_view = col.view()
cdef pair[unique_ptr[device_buffer], size_type] cpp_out
Expand All @@ -48,7 +48,7 @@ def bools_to_mask(Column col):
up_db = move(cpp_out.first)

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_device_buffer_like(rmm_db)
buf = as_buffer(rmm_db)
return buf


Expand All @@ -57,9 +57,9 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
Given a mask buffer, returns a boolean column representng bit 0 -> False
and 1 -> True within range of [begin_bit, end_bit),
"""
if not isinstance(mask_buffer, cudf.core.buffer.DeviceBufferLike):
if not isinstance(mask_buffer, cudf.core.buffer.Buffer):
raise TypeError("mask_buffer is not an instance of "
"cudf.core.buffer.DeviceBufferLike")
"cudf.core.buffer.Buffer")
cdef bitmask_type* bit_mask = <bitmask_type*><uintptr_t>(mask_buffer.ptr)

cdef unique_ptr[column] result
Expand All @@ -84,7 +84,7 @@ def nans_to_nulls(Column input):
return None

buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
buffer = as_device_buffer_like(buffer)
buffer = as_buffer(buffer)
return buffer


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,8 @@ cdef data_from_table_view(
along with referencing an ``owner`` Python object that owns the memory
lifetime. If ``owner`` is a Frame we reach inside of it and
reach inside of each ``cudf.Column`` to make the owner of each newly
created ``DeviceBufferLike`` underneath the ``cudf.Column`` objects of the
created Frame the respective ``DeviceBufferLike`` from the relevant
created ``Buffer`` underneath the ``cudf.Column`` objects of the
created Frame the respective ``Buffer`` from the relevant
``cudf.Column`` of the ``owner`` Frame
"""
cdef size_type column_idx = 0
Expand Down
Loading

0 comments on commit d6a9e4a

Please sign in to comment.