Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove cudf._lib.null_mask in favor of inlining pylibcudf #17440

Merged
merged 6 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ set(cython_sources
join.pyx
json.pyx
merge.pyx
null_mask.pyx
orc.pyx
parquet.pyx
reduce.pyx
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
join,
json,
merge,
null_mask,
nvtext,
orc,
parquet,
Expand Down
21 changes: 14 additions & 7 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import pylibcudf
import rmm

import cudf
import cudf._lib as libcudf
from cudf.core.buffer import (
Buffer,
ExposureTrackedBuffer,
Expand All @@ -36,7 +35,6 @@ from cudf._lib.types cimport (
dtype_to_pylibcudf_type,
)

from cudf._lib.null_mask import bitmask_allocation_size_bytes
from cudf._lib.types import dtype_from_pylibcudf_column

cimport pylibcudf.libcudf.copying as cpp_copying
Expand Down Expand Up @@ -159,7 +157,10 @@ cdef class Column:
if self.base_mask is None or self.offset == 0:
self._mask = self.base_mask
else:
self._mask = libcudf.null_mask.copy_bitmask(self)
with acquire_spill_lock():
self._mask = as_buffer(
pylibcudf.null_mask.copy_bitmask(self.to_pylibcudf(mode="read"))
)
return self._mask

@property
Expand All @@ -183,7 +184,9 @@ cdef class Column:

if value is not None:
# bitmask size must be relative to offset = 0 data.
required_size = bitmask_allocation_size_bytes(self.base_size)
required_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(
self.base_size
)
if value.size < required_size:
error_msg = (
"The Buffer for mask is smaller than expected, "
Expand Down Expand Up @@ -220,7 +223,7 @@ cdef class Column:
and compute new data Buffers zero-copy that use pointer arithmetic to
properly adjust the pointer.
"""
mask_size = bitmask_allocation_size_bytes(self.size)
mask_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(self.size)
required_num_bytes = -(-self.size // 8) # ceiling divide
error_msg = (
"The value for mask is smaller than expected, got {} bytes, "
Expand Down Expand Up @@ -790,13 +793,17 @@ cdef class Column:
mask = as_buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(base_size)
size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
base_size
)
)
)
else:
mask = as_buffer(
data=mask_ptr,
size=bitmask_allocation_size_bytes(base_size),
size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
base_size
),
owner=mask_owner,
exposed=True
)
Expand Down
65 changes: 0 additions & 65 deletions python/cudf/cudf/_lib/null_mask.pyx

This file was deleted.

23 changes: 14 additions & 9 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib.column import Column
from cudf._lib.null_mask import (
MaskState,
bitmask_allocation_size_bytes,
create_null_mask,
)
from cudf._lib.scalar import as_device_scalar
from cudf._lib.stream_compaction import (
apply_boolean_mask,
Expand Down Expand Up @@ -383,7 +378,7 @@ def memory_usage(self) -> int:
if self.data is not None:
n += self.data.size
if self.nullable:
n += bitmask_allocation_size_bytes(self.size)
n += plc.null_mask.bitmask_allocation_size_bytes(self.size)
return n

def _fill(
Expand All @@ -410,7 +405,11 @@ def _fill(
)

if not slr.is_valid() and not self.nullable:
mask = create_null_mask(self.size, state=MaskState.ALL_VALID)
mask = as_buffer(
plc.null_mask.create_null_mask(
self.size, plc.null_mask.MaskState.ALL_VALID
)
)
self.set_base_mask(mask)

libcudf.filling.fill_in_place(self, begin, end, slr.device_value)
Expand Down Expand Up @@ -1549,7 +1548,11 @@ def column_empty(
data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))

if masked:
mask = create_null_mask(row_count, state=MaskState.ALL_NULL)
mask = as_buffer(
plc.null_mask.create_null_mask(
row_count, plc.null_mask.MaskState.ALL_NULL
)
)
else:
mask = None

Expand Down Expand Up @@ -2206,7 +2209,9 @@ def _mask_from_cuda_array_interface_desc(obj, cai_mask) -> Buffer:
typestr = desc["typestr"]
typecode = typestr[1]
if typecode == "t":
mask_size = bitmask_allocation_size_bytes(desc["shape"][0])
mask_size = plc.null_mask.bitmask_allocation_size_bytes(
desc["shape"][0]
)
return as_buffer(data=desc["data"][0], size=mask_size, owner=obj)
elif typecode == "b":
col = as_column(cai_mask)
Expand Down
7 changes: 2 additions & 5 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ def __init__(

@cached_property
def memory_usage(self):
n = 0
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)

n = super().memory_usage
child0_size = (self.size + 1) * self.base_children[0].dtype.itemsize
current_base_child = self.base_children[1]
current_offset = self.offset
Expand All @@ -97,7 +94,7 @@ def memory_usage(self):
) * current_base_child.dtype.itemsize

if current_base_child.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(
n += plc.null_mask.bitmask_allocation_size_bytes(
current_base_child.size
)
return n
Expand Down
6 changes: 1 addition & 5 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5750,17 +5750,13 @@ def end_offset(self) -> int:

@cached_property
def memory_usage(self) -> int:
n = 0
if self.data is not None:
n += self.data.size
n = super().memory_usage
if len(self.base_children) == 1:
child0_size = (self.size + 1) * self.base_children[
0
].dtype.itemsize

n += child0_size
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
return n

@property
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/core/column/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,7 @@ def to_pandas(

@cached_property
def memory_usage(self) -> int:
n = 0
if self.nullable:
n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)

n = super().memory_usage
for child in self.children:
n += child.memory_usage

Expand Down
9 changes: 5 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from cudf.core import column, df_protocol, indexing_utils, reshape
from cudf.core._compat import PANDAS_LT_300
from cudf.core.abc import Serializable
from cudf.core.buffer import acquire_spill_lock
from cudf.core.buffer import acquire_spill_lock, as_buffer
from cudf.core.column import (
CategoricalColumn,
ColumnBase,
Expand Down Expand Up @@ -3191,9 +3191,10 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):

out.append(result._with_type_metadata(col.dtype))
else:
out_mask = cudf._lib.null_mask.create_null_mask(
len(source_col),
state=cudf._lib.null_mask.MaskState.ALL_NULL,
out_mask = as_buffer(
plc.null_mask.create_null_mask(
len(source_col), plc.null_mask.MaskState.ALL_NULL
)
)
out.append(source_col.set_mask(out_mask))

Expand Down
4 changes: 1 addition & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import cudf
from cudf import _lib as libcudf
from cudf._lib import groupby as libgroupby
from cudf._lib.null_mask import bitmask_or
from cudf._lib.sort import segmented_sort_by_key
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
Expand Down Expand Up @@ -1105,8 +1104,7 @@ def ngroup(self, ascending=True):
"""
index = self.grouping.keys.unique().sort_values()
num_groups = len(index)
_, has_null_group = bitmask_or([*index._columns])

has_null_group = any(col.has_nulls() for col in index._columns)
if ascending:
# Count ascending from 0 to num_groups - 1
groups = range(num_groups)
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from numba.cuda.cudadecl import registry as cuda_decl_registry
from numba.cuda.cudaimpl import lower as cuda_lower

import pylibcudf as plc

import cudf
from cudf._lib.null_mask import bitmask_allocation_size_bytes
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.core.udf.strings_lowering import cast_string_view_to_udf_string
from cudf.core.udf.strings_typing import StringView, string_view, udf_string
Expand Down Expand Up @@ -91,7 +92,7 @@ def random_bitmask(size):
size : int
number of bits
"""
sz = bitmask_allocation_size_bytes(size)
sz = plc.null_mask.bitmask_allocation_size_bytes(size)
rng = np.random.default_rng(seed=0)
data = rng.integers(0, 255, dtype="u1", size=sz)
return data.view("i1")
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import pandas as pd

import pylibcudf as plc
import rmm

import cudf
Expand Down Expand Up @@ -252,7 +253,7 @@ def pa_mask_buffer_to_mask(mask_buf, size):
"""
Convert PyArrow mask buffer to cuDF mask buffer
"""
mask_size = cudf._lib.null_mask.bitmask_allocation_size_bytes(size)
mask_size = plc.null_mask.bitmask_allocation_size_bytes(size)
if mask_buf.size < mask_size:
dbuf = rmm.DeviceBuffer(size=mask_size)
dbuf.copy_from_host(np.asarray(mask_buf).view("u1"))
Expand Down
Loading