Skip to content

Commit

Permalink
Remove all references to UNKNOWN_NULL_COUNT in Python (#13345)
Browse files Browse the repository at this point in the history
Part of #11968

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)

URL: #13345
  • Loading branch information
vyasr authored May 16, 2023
1 parent 3e5f00e commit fc43b7e
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 21 deletions.
15 changes: 2 additions & 13 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,9 @@ cudf::size_type valid_count(bitmask_type const* bitmask,
rmm::cuda_stream_view stream);

/**
* @brief Given a validity bitmask, counts the number of null elements (unset bits)
* in the range `[start, stop)`.
*
* If `bitmask == nullptr`, all elements are assumed to be valid and the
* function returns ``.
*
* @throws cudf::logic_error if `start > stop`
* @throws cudf::logic_error if `start < 0`
* @copydoc null_count(bitmask_type const* bitmask, size_type start, size_type stop)
*
* @param[in] bitmask Validity bitmask residing in device memory.
* @param[in] start Index of the first bit to count (inclusive).
* @param[in] stop Index of the last bit to count (exclusive).
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
* @return The number of null elements in the specified range.
* @param stream Stream view on which to allocate resources and queue execution.
*/
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
Expand Down
16 changes: 16 additions & 0 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,21 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Given a validity bitmask, counts the number of null elements (unset bits)
* in the range `[start, stop)`.
*
* If `bitmask == nullptr`, all elements are assumed to be valid and the
* function returns ``.
*
* @throws cudf::logic_error if `start > stop`
* @throws cudf::logic_error if `start < 0`
*
* @param bitmask Validity bitmask residing in device memory.
* @param start Index of the first bit to count (inclusive).
* @param stop Index of the last bit to count (exclusive).
* @return The number of null elements in the specified range.
*/
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
/** @} */ // end of group
} // namespace cudf
6 changes: 6 additions & 0 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -530,4 +530,10 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
return detail::bitmask_or(view, cudf::get_default_stream(), mr);
}

// Count non-zero bits in the specified range
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop)
{
return detail::null_count(bitmask, start, stop, cudf::get_default_stream());
}

} // namespace cudf
16 changes: 13 additions & 3 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from cudf.core.buffer import (
as_buffer,
)
from cudf.utils.dtypes import _get_base_dtype

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
Expand All @@ -37,6 +38,7 @@ from cudf._lib.cpp.column.column_factories cimport (
make_numeric_column,
)
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.null_mask cimport null_count as c_null_count
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.scalar cimport DeviceScalar

Expand Down Expand Up @@ -308,7 +310,15 @@ cdef class Column:

cdef libcudf_types.size_type compute_null_count(self) except? 0:
with acquire_spill_lock():
return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count()
if not self.nullable:
return 0
return c_null_count(
<libcudf_types.bitmask_type*><uintptr_t>(
self.base_mask.get_ptr(mode="read")
),
self.offset,
self.offset + self.size
)

cdef mutable_column_view mutable_view(self) except *:
if is_categorical_dtype(self.dtype):
Expand Down Expand Up @@ -349,7 +359,7 @@ cdef class Column:
null_count = self._null_count

if null_count is None:
null_count = libcudf_types.UNKNOWN_NULL_COUNT
null_count = 0
cdef libcudf_types.size_type c_null_count = null_count

self._mask = None
Expand All @@ -369,7 +379,7 @@ cdef class Column:
cdef column_view view(self) except *:
null_count = self.null_count
if null_count is None:
null_count = libcudf_types.UNKNOWN_NULL_COUNT
null_count = 0
cdef libcudf_types.size_type c_null_count = null_count
return self._view(c_null_count)

Expand Down
10 changes: 8 additions & 2 deletions python/cudf/cudf/_lib/cpp/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from libc.stdint cimport int32_t
from libcpp.pair cimport pair
Expand All @@ -7,7 +7,7 @@ from rmm._lib.device_buffer cimport device_buffer

from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport mask_state, size_type
from cudf._lib.cpp.types cimport bitmask_type, mask_state, size_type

ctypedef int32_t underlying_type_t_mask_state

Expand Down Expand Up @@ -38,3 +38,9 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
cdef pair[device_buffer, size_type] bitmask_or(
table_view view
)

cdef size_type null_count(
const bitmask_type * bitmask,
size_type start,
size_type stop,
)
3 changes: 0 additions & 3 deletions python/cudf/cudf/_lib/cpp/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
ctypedef uint32_t bitmask_type
ctypedef uint32_t char_utf8

cdef enum:
UNKNOWN_NULL_COUNT = -1

ctypedef enum mask_state:
UNALLOCATED "cudf::mask_state::UNALLOCATED"
UNINITIALIZED "cudf::mask_state::UNINITIALIZED"
Expand Down

0 comments on commit fc43b7e

Please sign in to comment.