From fc43b7e7b2ea19c448e27b583aa82992c15050b5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 16 May 2023 14:31:27 -0700
Subject: [PATCH] Remove all references to UNKNOWN_NULL_COUNT in Python
 (#13345)

Part of #11968

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/13345
---
 cpp/include/cudf/detail/null_mask.hpp   | 15 ++-------------
 cpp/include/cudf/null_mask.hpp          | 16 ++++++++++++++++
 cpp/src/bitmask/null_mask.cu            |  6 ++++++
 python/cudf/cudf/_lib/column.pyx        | 16 +++++++++++++---
 python/cudf/cudf/_lib/cpp/null_mask.pxd | 10 ++++++++--
 python/cudf/cudf/_lib/cpp/types.pxd     |  3 ---
 6 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp
index 7f1b15893c5..8c10bbe416f 100644
--- a/cpp/include/cudf/detail/null_mask.hpp
+++ b/cpp/include/cudf/detail/null_mask.hpp
@@ -141,20 +141,9 @@ cudf::size_type valid_count(bitmask_type const* bitmask,
                             rmm::cuda_stream_view stream);
 
 /**
- * @brief Given a validity bitmask, counts the number of null elements (unset bits)
- * in the range `[start, stop)`.
- *
- * If `bitmask == nullptr`, all elements are assumed to be valid and the
- * function returns ``.
- *
- * @throws cudf::logic_error if `start > stop`
- * @throws cudf::logic_error if `start < 0`
+ * @copydoc null_count(bitmask_type const* bitmask, size_type start, size_type stop)
  *
- * @param[in] bitmask Validity bitmask residing in device memory.
- * @param[in] start Index of the first bit to count (inclusive).
- * @param[in] stop Index of the last bit to count (exclusive).
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- * @return The number of null elements in the specified range.
+ * @param stream Stream view on which to allocate resources and queue execution.
  */
 cudf::size_type null_count(bitmask_type const* bitmask,
                            size_type start,
diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp
index e8bc97e95b3..672f479ad53 100644
--- a/cpp/include/cudf/null_mask.hpp
+++ b/cpp/include/cudf/null_mask.hpp
@@ -170,5 +170,21 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
   table_view const& view,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Given a validity bitmask, counts the number of null elements (unset bits)
+ * in the range `[start, stop)`.
+ *
+ * If `bitmask == nullptr`, all elements are assumed to be valid and the
+ * function returns ``.
+ *
+ * @throws cudf::logic_error if `start > stop`
+ * @throws cudf::logic_error if `start < 0`
+ *
+ * @param bitmask Validity bitmask residing in device memory.
+ * @param start Index of the first bit to count (inclusive).
+ * @param stop Index of the last bit to count (exclusive).
+ * @return The number of null elements in the specified range.
+ */
+cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 4c22988900b..8aec05abcfe 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -530,4 +530,10 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
   return detail::bitmask_or(view, cudf::get_default_stream(), mr);
 }
 
+// Count non-zero bits in the specified range
+cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop)
+{
+  return detail::null_count(bitmask, start, stop, cudf::get_default_stream());
+}
+
 }  // namespace cudf
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 428db210532..3a36ca65866 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -17,6 +17,7 @@ from cudf.core.buffer import (
     as_buffer,
 )
 from cudf.utils.dtypes import _get_base_dtype
+
 from cpython.buffer cimport PyObject_CheckBuffer
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport make_unique, unique_ptr
@@ -37,6 +38,7 @@ from cudf._lib.cpp.column.column_factories cimport (
     make_numeric_column,
 )
 from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.null_mask cimport null_count as c_null_count
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.scalar cimport DeviceScalar
 
@@ -308,7 +310,15 @@ cdef class Column:
 
     cdef libcudf_types.size_type compute_null_count(self) except? 0:
         with acquire_spill_lock():
-            return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count()
+            if not self.nullable:
+                return 0
+            return c_null_count(
+                <libcudf_types.bitmask_type*><uintptr_t>(
+                    self.base_mask.get_ptr(mode="read")
+                ),
+                self.offset,
+                self.offset + self.size
+            )
 
     cdef mutable_column_view mutable_view(self) except *:
         if is_categorical_dtype(self.dtype):
@@ -349,7 +359,7 @@ cdef class Column:
         null_count = self._null_count
 
         if null_count is None:
-            null_count = libcudf_types.UNKNOWN_NULL_COUNT
+            null_count = 0
         cdef libcudf_types.size_type c_null_count = null_count
 
         self._mask = None
@@ -369,7 +379,7 @@ cdef class Column:
     cdef column_view view(self) except *:
         null_count = self.null_count
         if null_count is None:
-            null_count = libcudf_types.UNKNOWN_NULL_COUNT
+            null_count = 0
         cdef libcudf_types.size_type c_null_count = null_count
         return self._view(c_null_count)
 
diff --git a/python/cudf/cudf/_lib/cpp/null_mask.pxd b/python/cudf/cudf/_lib/cpp/null_mask.pxd
index 3050a9f3459..bd0eb684690 100644
--- a/python/cudf/cudf/_lib/cpp/null_mask.pxd
+++ b/python/cudf/cudf/_lib/cpp/null_mask.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
 from libcpp.pair cimport pair
@@ -7,7 +7,7 @@ from rmm._lib.device_buffer cimport device_buffer
 
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport mask_state, size_type
+from cudf._lib.cpp.types cimport bitmask_type, mask_state, size_type
 
 ctypedef int32_t underlying_type_t_mask_state
 
@@ -38,3 +38,9 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     cdef pair[device_buffer, size_type] bitmask_or(
         table_view view
     )
+
+    cdef size_type null_count(
+        const bitmask_type * bitmask,
+        size_type start,
+        size_type stop,
+    )
diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd
index e4106ffb99d..11480d774ef 100644
--- a/python/cudf/cudf/_lib/cpp/types.pxd
+++ b/python/cudf/cudf/_lib/cpp/types.pxd
@@ -8,9 +8,6 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
     ctypedef uint32_t bitmask_type
     ctypedef uint32_t char_utf8
 
-    cdef enum:
-        UNKNOWN_NULL_COUNT = -1
-
     ctypedef enum mask_state:
         UNALLOCATED "cudf::mask_state::UNALLOCATED"
         UNINITIALIZED "cudf::mask_state::UNINITIALIZED"