From 37a3bc3e6d2ebec3f5e1f40cafccc95be9c33b81 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:59:58 -0800 Subject: [PATCH 1/4] Remove cudf._lib.null_mask in favor of inlining pylibcudf --- python/cudf/cudf/_lib/CMakeLists.txt | 1 - python/cudf/cudf/_lib/__init__.py | 1 - python/cudf/cudf/_lib/null_mask.pyx | 65 ------------------------ python/cudf/cudf/core/column/column.py | 23 +++++---- python/cudf/cudf/core/column/lists.py | 9 ++-- python/cudf/cudf/core/column/string.py | 6 +-- python/cudf/cudf/core/column/struct.py | 5 +- python/cudf/cudf/core/dataframe.py | 9 ++-- python/cudf/cudf/core/groupby/groupby.py | 9 +++- python/cudf/cudf/testing/_utils.py | 5 +- python/cudf/cudf/utils/utils.py | 3 +- 11 files changed, 37 insertions(+), 99 deletions(-) delete mode 100644 python/cudf/cudf/_lib/null_mask.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 1c2b24d2391..22d6abddeff 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -26,7 +26,6 @@ set(cython_sources json.pyx lists.pyx merge.pyx - null_mask.pyx orc.pyx parquet.pyx partitioning.pyx diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py index 13d05033c11..cad521073ad 100644 --- a/python/cudf/cudf/_lib/__init__.py +++ b/python/cudf/cudf/_lib/__init__.py @@ -12,7 +12,6 @@ join, json, merge, - null_mask, nvtext, orc, parquet, diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx deleted file mode 100644 index d54e8e66281..00000000000 --- a/python/cudf/cudf/_lib/null_mask.pyx +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -import pylibcudf -from pylibcudf.null_mask import MaskState - -from cudf.core.buffer import acquire_spill_lock, as_buffer - -from cudf._lib.column cimport Column - - -@acquire_spill_lock() -def copy_bitmask(Column col): - """ - Copies column's validity mask buffer into a new buffer, shifting by the - offset if nonzero - """ - if col.base_mask is None: - return None - - rmm_db = pylibcudf.null_mask.copy_bitmask(col.to_pylibcudf(mode="read")) - buf = as_buffer(rmm_db) - return buf - - -def bitmask_allocation_size_bytes(num_bits): - """ - Given a size, calculates the number of bytes that should be allocated for a - column validity mask - """ - return pylibcudf.null_mask.bitmask_allocation_size_bytes(num_bits) - - -def create_null_mask(size, state=MaskState.UNINITIALIZED): - """ - Given a size and a mask state, allocate a mask that can properly represent - the given size with the given mask state - - Parameters - ---------- - size : int - Number of elements the mask needs to be able to represent - state : ``MaskState``, default ``MaskState.UNINITIALIZED`` - State the null mask should be created in - """ - rmm_db = pylibcudf.null_mask.create_null_mask(size, state) - buf = as_buffer(rmm_db) - return buf - - -@acquire_spill_lock() -def bitmask_and(list columns): - rmm_db, other = pylibcudf.null_mask.bitmask_and( - [col.to_pylibcudf(mode="read") for col in columns] - ) - buf = as_buffer(rmm_db) - return buf, other - - -@acquire_spill_lock() -def bitmask_or(list columns): - rmm_db, other = pylibcudf.null_mask.bitmask_or( - [col.to_pylibcudf(mode="read") for col in columns] - ) - buf = as_buffer(rmm_db) - return buf, other diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 53946be1c49..1cb73464854 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -25,11 +25,6 @@ import cudf from cudf import _lib as libcudf from cudf._lib.column import Column -from cudf._lib.null_mask import ( - MaskState, - bitmask_allocation_size_bytes, - create_null_mask, -) from cudf._lib.scalar import as_device_scalar from cudf._lib.stream_compaction import ( apply_boolean_mask, @@ -383,7 +378,7 @@ def memory_usage(self) -> int: if self.data is not None: n += self.data.size if self.nullable: - n += bitmask_allocation_size_bytes(self.size) + n += plc.null_mask.bitmask_allocation_size_bytes(self.size) return n def _fill( @@ -410,7 +405,11 @@ def _fill( ) if not slr.is_valid() and not self.nullable: - mask = create_null_mask(self.size, state=MaskState.ALL_VALID) + mask = as_buffer( + plc.null_mask.create_null_mask( + self.size, plc.null_mask.MaskState.ALL_VALID + ) + ) self.set_base_mask(mask) libcudf.filling.fill_in_place(self, begin, end, slr.device_value) @@ -1549,7 +1548,11 @@ def column_empty( data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize)) if masked: - mask = create_null_mask(row_count, state=MaskState.ALL_NULL) + mask = as_buffer( + plc.null_mask.create_null_mask( + row_count, plc.null_mask.MaskState.ALL_NULL + ) + ) else: mask = None @@ -2206,7 +2209,9 @@ def _mask_from_cuda_array_interface_desc(obj, cai_mask) -> Buffer: typestr = desc["typestr"] typecode = typestr[1] if typecode == "t": - mask_size = bitmask_allocation_size_bytes(desc["shape"][0]) + mask_size = plc.null_mask.bitmask_allocation_size_bytes( + desc["shape"][0] + ) return as_buffer(data=desc["data"][0], size=mask_size, owner=obj) elif typecode == "b": col = as_column(cai_mask) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 6b25e568f00..55b31e43818 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -10,6 +10,8 @@ import pyarrow as pa from typing_extensions import Self +import pylibcudf as plc + import cudf from cudf._lib.lists import ( concatenate_list_elements, @@ -79,10 +81,7 @@ def __init__( @cached_property def memory_usage(self): - n = 0 - if self.nullable: - n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size) - + n = super().memory_usage child0_size = (self.size + 1) * self.base_children[0].dtype.itemsize current_base_child = self.base_children[1] current_offset = self.offset @@ -107,7 +106,7 @@ def memory_usage(self): ) * current_base_child.dtype.itemsize if current_base_child.nullable: - n += cudf._lib.null_mask.bitmask_allocation_size_bytes( + n += plc.null_mask.bitmask_allocation_size_bytes( current_base_child.size ) return n diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a9ab2d373fd..47763063c4c 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5750,17 +5750,13 @@ def end_offset(self) -> int: @cached_property def memory_usage(self) -> int: - n = 0 - if self.data is not None: - n += self.data.size + n = super().memory_usage if len(self.base_children) == 1: child0_size = (self.size + 1) * self.base_children[ 0 ].dtype.itemsize n += child0_size - if self.nullable: - n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size) return n @property diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 8f16ba4e15b..2adc6b54bab 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -101,10 +101,7 @@ def to_pandas( @cached_property def memory_usage(self) -> int: - n = 0 - if self.nullable: - n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size) - + n = super().memory_usage for child in self.children: n += child.memory_usage diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 728cc47a7c9..abc04df182b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -45,7 +45,7 @@ from cudf.core import column, df_protocol, indexing_utils, reshape from cudf.core._compat import PANDAS_LT_300 from cudf.core.abc import Serializable -from cudf.core.buffer import acquire_spill_lock +from cudf.core.buffer import acquire_spill_lock, as_buffer from cudf.core.column import ( CategoricalColumn, ColumnBase, @@ -3156,9 +3156,10 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None): out.append(result._with_type_metadata(col.dtype)) else: - out_mask = cudf._lib.null_mask.create_null_mask( - len(source_col), - state=cudf._lib.null_mask.MaskState.ALL_NULL, + out_mask = as_buffer( + plc.null_mask.create_null_mask( + len(source_col), plc.null_mask.MaskState.ALL_NULL + ) ) out.append(source_col.set_mask(out_mask)) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e59b948aba9..77a0f26dc70 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -14,10 +14,11 @@ import numpy as np import pandas as pd +import pylibcudf as plc + import cudf from cudf import _lib as libcudf from cudf._lib import groupby as libgroupby -from cudf._lib.null_mask import bitmask_or from cudf._lib.reshape import interleave_columns from cudf._lib.sort import segmented_sort_by_key from cudf._lib.types import size_type_dtype @@ -25,6 +26,7 @@ from cudf.api.types import is_list_like, is_numeric_dtype from cudf.core._compat import PANDAS_LT_300 from cudf.core.abc import Serializable +from cudf.core.buffer import acquire_spill_lock from cudf.core.column.column import ColumnBase, StructDtype, as_column from cudf.core.column_accessor import ColumnAccessor from cudf.core.copy_types import GatherMap @@ -1103,7 +1105,10 @@ def ngroup(self, ascending=True): """ index = self.grouping.keys.unique().sort_values() num_groups = len(index) - _, has_null_group = bitmask_or([*index._columns]) + with acquire_spill_lock(): + _, has_null_group = plc.null_mask.bitmask_or( + [col.to_pylibcudf(mode="read") for col in index._columns] + ) if ascending: # Count ascending from 0 to num_groups - 1 diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index a5dc8a5498c..6624a1a150e 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -15,8 +15,9 @@ from numba.cuda.cudadecl import registry as cuda_decl_registry from numba.cuda.cudaimpl import lower as cuda_lower +import pylibcudf as plc + import cudf -from cudf._lib.null_mask import bitmask_allocation_size_bytes from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion from cudf.core.udf.strings_lowering import cast_string_view_to_udf_string from cudf.core.udf.strings_typing import StringView, string_view, udf_string @@ -91,7 +92,7 @@ def random_bitmask(size): size : int number of bits """ - sz = bitmask_allocation_size_bytes(size) + sz = plc.null_mask.bitmask_allocation_size_bytes(size) rng = np.random.default_rng(seed=0) data = rng.integers(0, 255, dtype="u1", size=sz) return data.view("i1") diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 294253cd119..e6d252b8807 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +import pylibcudf as plc import rmm import cudf @@ -252,7 +253,7 @@ def pa_mask_buffer_to_mask(mask_buf, size): """ Convert PyArrow mask buffer to cuDF mask buffer """ - mask_size = cudf._lib.null_mask.bitmask_allocation_size_bytes(size) + mask_size = plc.null_mask.bitmask_allocation_size_bytes(size) if mask_buf.size < mask_size: dbuf = rmm.DeviceBuffer(size=mask_size) dbuf.copy_from_host(np.asarray(mask_buf).view("u1")) From 78de18e895a56ddc402e3c847f5d5b88312a2b24 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:22:31 -0800 Subject: [PATCH 2/4] Another reference --- python/cudf/cudf/_lib/column.pyx | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 94dbdf5534d..b0c41b6a179 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -36,7 +36,6 @@ from cudf._lib.types cimport ( dtype_to_pylibcudf_type, ) -from cudf._lib.null_mask import bitmask_allocation_size_bytes from cudf._lib.types import dtype_from_pylibcudf_column cimport pylibcudf.libcudf.copying as cpp_copying @@ -183,7 +182,9 @@ cdef class Column: if value is not None: # bitmask size must be relative to offset = 0 data. - required_size = bitmask_allocation_size_bytes(self.base_size) + required_size = pylibcudf.null_mask.bitmask_allocation_size_bytes( + self.base_size + ) if value.size < required_size: error_msg = ( "The Buffer for mask is smaller than expected, " @@ -220,7 +221,7 @@ cdef class Column: and compute new data Buffers zero-copy that use pointer arithmetic to properly adjust the pointer. """ - mask_size = bitmask_allocation_size_bytes(self.size) + mask_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(self.size) required_num_bytes = -(-self.size // 8) # ceiling divide error_msg = ( "The value for mask is smaller than expected, got {} bytes, " @@ -790,13 +791,17 @@ cdef class Column: mask = as_buffer( rmm.DeviceBuffer( ptr=mask_ptr, - size=bitmask_allocation_size_bytes(base_size) + size=pylibcudf.null_mask.bitmask_allocation_size_bytes( + base_size + ) ) ) else: mask = as_buffer( data=mask_ptr, - size=bitmask_allocation_size_bytes(base_size), + size=pylibcudf.null_mask.bitmask_allocation_size_bytes( + base_size + ), owner=mask_owner, exposed=True ) From cc3eee5ec96fbd0ee288a01a48902d4963982158 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:45:28 -0800 Subject: [PATCH 3/4] address copy_bitmask usage --- python/cudf/cudf/_lib/column.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index b0c41b6a179..9cbe11d61ac 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -11,7 +11,6 @@ import pylibcudf import rmm import cudf -import cudf._lib as libcudf from cudf.core.buffer import ( Buffer, ExposureTrackedBuffer, @@ -158,7 +157,10 @@ cdef class Column: if self.base_mask is None or self.offset == 0: self._mask = self.base_mask else: - self._mask = libcudf.null_mask.copy_bitmask(self) + with acquire_spill_lock(): + self._mask = as_buffer( + pylibcudf.null_mask.copy_bitmask(self.to_pylibcudf(mode="read")) + ) return self._mask @property From 125b844c3fc7ca0183cca4c62c340e4b22a97fdb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:59:18 -0800 Subject: [PATCH 4/4] Avoid bitmask_or --- python/cudf/cudf/core/groupby/groupby.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ac0958aa7f0..18dd741a663 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1104,11 +1104,7 @@ def ngroup(self, ascending=True): """ index = self.grouping.keys.unique().sort_values() num_groups = len(index) - with acquire_spill_lock(): - _, has_null_group = plc.null_mask.bitmask_or( - [col.to_pylibcudf(mode="read") for col in index._columns] - ) - + has_null_group = any(col.has_nulls() for col in index._columns) if ascending: # Count ascending from 0 to num_groups - 1 groups = range(num_groups)