From fc43b7e7b2ea19c448e27b583aa82992c15050b5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani <vyasr@nvidia.com> Date: Tue, 16 May 2023 14:31:27 -0700 Subject: [PATCH] Remove all references to UNKNOWN_NULL_COUNT in Python (#13345) Part of #11968 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - Ashwin Srinath (https://github.com/shwina) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13345 --- cpp/include/cudf/detail/null_mask.hpp | 15 ++------------- cpp/include/cudf/null_mask.hpp | 16 ++++++++++++++++ cpp/src/bitmask/null_mask.cu | 6 ++++++ python/cudf/cudf/_lib/column.pyx | 16 +++++++++++++--- python/cudf/cudf/_lib/cpp/null_mask.pxd | 10 ++++++++-- python/cudf/cudf/_lib/cpp/types.pxd | 3 --- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 7f1b15893c5..8c10bbe416f 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -141,20 +141,9 @@ cudf::size_type valid_count(bitmask_type const* bitmask, rmm::cuda_stream_view stream); /** - * @brief Given a validity bitmask, counts the number of null elements (unset bits) - * in the range `[start, stop)`. - * - * If `bitmask == nullptr`, all elements are assumed to be valid and the - * function returns ``. - * - * @throws cudf::logic_error if `start > stop` - * @throws cudf::logic_error if `start < 0` + * @copydoc null_count(bitmask_type const* bitmask, size_type start, size_type stop) * - * @param[in] bitmask Validity bitmask residing in device memory. - * @param[in] start Index of the first bit to count (inclusive). - * @param[in] stop Index of the last bit to count (exclusive). - * @param[in] stream CUDA stream used for device memory operations and kernel launches. - * @return The number of null elements in the specified range. + * @param stream Stream view on which to allocate resources and queue execution. */ cudf::size_type null_count(bitmask_type const* bitmask, size_type start, diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index e8bc97e95b3..672f479ad53 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -170,5 +170,21 @@ std::pair<rmm::device_buffer, size_type> bitmask_or( table_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Given a validity bitmask, counts the number of null elements (unset bits) + * in the range `[start, stop)`. + * + * If `bitmask == nullptr`, all elements are assumed to be valid and the + * function returns ``. + * + * @throws cudf::logic_error if `start > stop` + * @throws cudf::logic_error if `start < 0` + * + * @param bitmask Validity bitmask residing in device memory. + * @param start Index of the first bit to count (inclusive). + * @param stop Index of the last bit to count (exclusive). + * @return The number of null elements in the specified range. + */ +cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop); /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 4c22988900b..8aec05abcfe 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -530,4 +530,10 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view, return detail::bitmask_or(view, cudf::get_default_stream(), mr); } +// Count non-zero bits in the specified range +cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop) +{ + return detail::null_count(bitmask, start, stop, cudf::get_default_stream()); +} + } // namespace cudf diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 428db210532..3a36ca65866 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -17,6 +17,7 @@ from cudf.core.buffer import ( as_buffer, ) from cudf.utils.dtypes import _get_base_dtype + from cpython.buffer cimport PyObject_CheckBuffer from libc.stdint cimport uintptr_t from libcpp.memory cimport make_unique, unique_ptr @@ -37,6 +38,7 @@ from cudf._lib.cpp.column.column_factories cimport ( make_numeric_column, ) from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.null_mask cimport null_count as c_null_count from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.scalar cimport DeviceScalar @@ -308,7 +310,15 @@ cdef class Column: cdef libcudf_types.size_type compute_null_count(self) except? 0: with acquire_spill_lock(): - return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count() + if not self.nullable: + return 0 + return c_null_count( + <libcudf_types.bitmask_type*><uintptr_t>( + self.base_mask.get_ptr(mode="read") + ), + self.offset, + self.offset + self.size + ) cdef mutable_column_view mutable_view(self) except *: if is_categorical_dtype(self.dtype): @@ -349,7 +359,7 @@ cdef class Column: null_count = self._null_count if null_count is None: - null_count = libcudf_types.UNKNOWN_NULL_COUNT + null_count = 0 cdef libcudf_types.size_type c_null_count = null_count self._mask = None @@ -369,7 +379,7 @@ cdef class Column: cdef column_view view(self) except *: null_count = self.null_count if null_count is None: - null_count = libcudf_types.UNKNOWN_NULL_COUNT + null_count = 0 cdef libcudf_types.size_type c_null_count = null_count return self._view(c_null_count) diff --git a/python/cudf/cudf/_lib/cpp/null_mask.pxd b/python/cudf/cudf/_lib/cpp/null_mask.pxd index 3050a9f3459..bd0eb684690 100644 --- a/python/cudf/cudf/_lib/cpp/null_mask.pxd +++ b/python/cudf/cudf/_lib/cpp/null_mask.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from libc.stdint cimport int32_t from libcpp.pair cimport pair @@ -7,7 +7,7 @@ from rmm._lib.device_buffer cimport device_buffer from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.cpp.types cimport mask_state, size_type +from cudf._lib.cpp.types cimport bitmask_type, mask_state, size_type ctypedef int32_t underlying_type_t_mask_state @@ -38,3 +38,9 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil: cdef pair[device_buffer, size_type] bitmask_or( table_view view ) + + cdef size_type null_count( + const bitmask_type * bitmask, + size_type start, + size_type stop, + ) diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd index e4106ffb99d..11480d774ef 100644 --- a/python/cudf/cudf/_lib/cpp/types.pxd +++ b/python/cudf/cudf/_lib/cpp/types.pxd @@ -8,9 +8,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: ctypedef uint32_t bitmask_type ctypedef uint32_t char_utf8 - cdef enum: - UNKNOWN_NULL_COUNT = -1 - ctypedef enum mask_state: UNALLOCATED "cudf::mask_state::UNALLOCATED" UNINITIALIZED "cudf::mask_state::UNINITIALIZED"