Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove all references to UNKNOWN_NULL_COUNT in Python #13345

Merged
merged 8 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,9 @@ cudf::size_type valid_count(bitmask_type const* bitmask,
rmm::cuda_stream_view stream);

/**
* @brief Given a validity bitmask, counts the number of null elements (unset bits)
* in the range `[start, stop)`.
*
* If `bitmask == nullptr`, all elements are assumed to be valid and the
* function returns ``.
*
* @throws cudf::logic_error if `start > stop`
* @throws cudf::logic_error if `start < 0`
* @copydoc null_count(bitmask_type const* bitmask, size_type start, size_type stop)
*
* @param[in] bitmask Validity bitmask residing in device memory.
* @param[in] start Index of the first bit to count (inclusive).
* @param[in] stop Index of the last bit to count (exclusive).
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @return The number of null elements in the specified range.
* @param stream Stream view on which to allocate resources and queue execution.
*/
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
Expand Down
16 changes: 16 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,21 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Given a validity bitmask, counts the number of null elements (unset bits)
* in the range `[start, stop)`.
*
* If `bitmask == nullptr`, all elements are assumed to be valid and the
* function returns ``.
*
* @throws cudf::logic_error if `start > stop`
* @throws cudf::logic_error if `start < 0`
*
* @param bitmask Validity bitmask residing in device memory.
* @param start Index of the first bit to count (inclusive).
* @param stop Index of the last bit to count (exclusive).
* @return The number of null elements in the specified range.
*/
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
/** @} */ // end of group
} // namespace cudf
6 changes: 6 additions & 0 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -530,4 +530,10 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
return detail::bitmask_or(view, cudf::get_default_stream(), mr);
}

// Count non-zero bits in the specified range
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop)
{
return detail::null_count(bitmask, start, stop, cudf::get_default_stream());
vyasr marked this conversation as resolved.
Show resolved Hide resolved
}

} // namespace cudf
42 changes: 38 additions & 4 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ from cudf.core.buffer import (
as_buffer,
)
from cudf.utils.dtypes import _get_base_dtype

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from rmm._lib.device_buffer cimport DeviceBuffer
from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from cudf._lib.types cimport dtype_from_column_view, dtype_to_data_type

Expand All @@ -37,6 +38,10 @@ from cudf._lib.cpp.column.column_factories cimport (
make_numeric_column,
)
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.null_mask cimport (
copy_bitmask as c_copy_bitmask,
null_count as c_null_count,
)
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.scalar cimport DeviceScalar

Expand Down Expand Up @@ -307,8 +312,37 @@ cdef class Column:
return other_col

cdef libcudf_types.size_type compute_null_count(self) except? 0:
cdef device_buffer db
cdef unique_ptr[device_buffer] up_db
cdef DeviceBuffer rmm_db
with acquire_spill_lock():
return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count()
if self.nullable:
vyasr marked this conversation as resolved.
Show resolved Hide resolved
if self.offset == 0:
mask = self.base_mask
else:
# Can't use the normal copy_bitmask function because that
# requires creating a view, which leads to infinite
# recursion
db = move(c_copy_bitmask(
<libcudf_types.bitmask_type*><uintptr_t>(
self.base_mask.get_ptr(mode="read")
),
self.offset,
self.offset + self.size,
))
up_db = make_unique[device_buffer](move(db))
rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
mask = as_buffer(rmm_db)

return c_null_count(
<libcudf_types.bitmask_type*><uintptr_t>(
mask.get_ptr(mode="read")
),
0,
self.size
)
else:
return 0

cdef mutable_column_view mutable_view(self) except *:
if is_categorical_dtype(self.dtype):
Expand Down Expand Up @@ -349,7 +383,7 @@ cdef class Column:
null_count = self._null_count

if null_count is None:
null_count = libcudf_types.UNKNOWN_NULL_COUNT
null_count = 0
cdef libcudf_types.size_type c_null_count = null_count

self._mask = None
Expand All @@ -369,7 +403,7 @@ cdef class Column:
cdef column_view view(self) except *:
null_count = self.null_count
if null_count is None:
null_count = libcudf_types.UNKNOWN_NULL_COUNT
null_count = 0
cdef libcudf_types.size_type c_null_count = null_count
return self._view(c_null_count)

Expand Down
16 changes: 14 additions & 2 deletions python/cudf/cudf/_lib/cpp/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp.pair cimport pair
Expand All @@ -7,7 +7,7 @@ from rmm._lib.device_buffer cimport device_buffer

from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport mask_state, size_type
from cudf._lib.cpp.types cimport bitmask_type, mask_state, size_type

ctypedef int32_t underlying_type_t_mask_state

Expand All @@ -17,6 +17,12 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
column_view view
) except +

cdef device_buffer copy_bitmask "cudf::copy_bitmask" (
const bitmask_type * mask,
size_type begin_bit,
size_type end_bit
) except +

cdef size_t bitmask_allocation_size_bytes (
size_type number_of_bits,
size_t padding_boundary
Expand All @@ -38,3 +44,9 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
cdef pair[device_buffer, size_type] bitmask_or(
table_view view
)

cdef size_type null_count(
const bitmask_type * bitmask,
size_type start,
size_type stop,
)
3 changes: 0 additions & 3 deletions python/cudf/cudf/_lib/cpp/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
ctypedef uint32_t bitmask_type
ctypedef uint32_t char_utf8

cdef enum:
UNKNOWN_NULL_COUNT = -1

ctypedef enum mask_state:
UNALLOCATED "cudf::mask_state::UNALLOCATED"
UNINITIALIZED "cudf::mask_state::UNINITIALIZED"
Expand Down