diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index 45e0fc345b5..f7c9f6df8ad 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -24,7 +24,6 @@ set(cython_sources
     interop.pyx
     json.pyx
     merge.pyx
-    null_mask.pyx
     orc.pyx
     parquet.pyx
     reduce.pyx
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index c51db601985..7474c4e8cd1 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -11,7 +11,6 @@
     interop,
     json,
     merge,
-    null_mask,
     nvtext,
     orc,
     parquet,
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 94dbdf5534d..9cbe11d61ac 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -11,7 +11,6 @@ import pylibcudf
 import rmm
 
 import cudf
-import cudf._lib as libcudf
 from cudf.core.buffer import (
     Buffer,
     ExposureTrackedBuffer,
@@ -36,7 +35,6 @@ from cudf._lib.types cimport (
     dtype_to_pylibcudf_type,
 )
 
-from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf._lib.types import dtype_from_pylibcudf_column
 
 cimport pylibcudf.libcudf.copying as cpp_copying
@@ -159,7 +157,10 @@ cdef class Column:
             if self.base_mask is None or self.offset == 0:
                 self._mask = self.base_mask
             else:
-                self._mask = libcudf.null_mask.copy_bitmask(self)
+                with acquire_spill_lock():
+                    self._mask = as_buffer(
+                        pylibcudf.null_mask.copy_bitmask(self.to_pylibcudf(mode="read"))
+                    )
         return self._mask
 
     @property
@@ -183,7 +184,9 @@ cdef class Column:
 
         if value is not None:
             # bitmask size must be relative to offset = 0 data.
-            required_size = bitmask_allocation_size_bytes(self.base_size)
+            required_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(
+                self.base_size
+            )
             if value.size < required_size:
                 error_msg = (
                     "The Buffer for mask is smaller than expected, "
@@ -220,7 +223,7 @@ cdef class Column:
         and compute new data Buffers zero-copy that use pointer arithmetic to
         properly adjust the pointer.
         """
-        mask_size = bitmask_allocation_size_bytes(self.size)
+        mask_size = pylibcudf.null_mask.bitmask_allocation_size_bytes(self.size)
         required_num_bytes = -(-self.size // 8)  # ceiling divide
         error_msg = (
             "The value for mask is smaller than expected, got {}  bytes, "
@@ -790,13 +793,17 @@ cdef class Column:
                     mask = as_buffer(
                         rmm.DeviceBuffer(
                             ptr=mask_ptr,
-                            size=bitmask_allocation_size_bytes(base_size)
+                            size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
+                                base_size
+                            )
                         )
                     )
             else:
                 mask = as_buffer(
                     data=mask_ptr,
-                    size=bitmask_allocation_size_bytes(base_size),
+                    size=pylibcudf.null_mask.bitmask_allocation_size_bytes(
+                        base_size
+                    ),
                     owner=mask_owner,
                     exposed=True
                 )
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
deleted file mode 100644
index d54e8e66281..00000000000
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-import pylibcudf
-from pylibcudf.null_mask import MaskState
-
-from cudf.core.buffer import acquire_spill_lock, as_buffer
-
-from cudf._lib.column cimport Column
-
-
-@acquire_spill_lock()
-def copy_bitmask(Column col):
-    """
-    Copies column's validity mask buffer into a new buffer, shifting by the
-    offset if nonzero
-    """
-    if col.base_mask is None:
-        return None
-
-    rmm_db = pylibcudf.null_mask.copy_bitmask(col.to_pylibcudf(mode="read"))
-    buf = as_buffer(rmm_db)
-    return buf
-
-
-def bitmask_allocation_size_bytes(num_bits):
-    """
-    Given a size, calculates the number of bytes that should be allocated for a
-    column validity mask
-    """
-    return pylibcudf.null_mask.bitmask_allocation_size_bytes(num_bits)
-
-
-def create_null_mask(size, state=MaskState.UNINITIALIZED):
-    """
-    Given a size and a mask state, allocate a mask that can properly represent
-    the given size with the given mask state
-
-    Parameters
-    ----------
-    size : int
-        Number of elements the mask needs to be able to represent
-    state : ``MaskState``, default ``MaskState.UNINITIALIZED``
-        State the null mask should be created in
-    """
-    rmm_db = pylibcudf.null_mask.create_null_mask(size, state)
-    buf = as_buffer(rmm_db)
-    return buf
-
-
-@acquire_spill_lock()
-def bitmask_and(list columns):
-    rmm_db, other = pylibcudf.null_mask.bitmask_and(
-        [col.to_pylibcudf(mode="read") for col in columns]
-    )
-    buf = as_buffer(rmm_db)
-    return buf, other
-
-
-@acquire_spill_lock()
-def bitmask_or(list columns):
-    rmm_db, other = pylibcudf.null_mask.bitmask_or(
-        [col.to_pylibcudf(mode="read") for col in columns]
-    )
-    buf = as_buffer(rmm_db)
-    return buf, other
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index f0df4a3c1b3..8ddfd4a54ae 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -25,11 +25,6 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.column import Column
-from cudf._lib.null_mask import (
-    MaskState,
-    bitmask_allocation_size_bytes,
-    create_null_mask,
-)
 from cudf._lib.scalar import as_device_scalar
 from cudf._lib.stream_compaction import (
     apply_boolean_mask,
@@ -383,7 +378,7 @@ def memory_usage(self) -> int:
         if self.data is not None:
             n += self.data.size
         if self.nullable:
-            n += bitmask_allocation_size_bytes(self.size)
+            n += plc.null_mask.bitmask_allocation_size_bytes(self.size)
         return n
 
     def _fill(
@@ -410,7 +405,11 @@ def _fill(
             )
 
         if not slr.is_valid() and not self.nullable:
-            mask = create_null_mask(self.size, state=MaskState.ALL_VALID)
+            mask = as_buffer(
+                plc.null_mask.create_null_mask(
+                    self.size, plc.null_mask.MaskState.ALL_VALID
+                )
+            )
             self.set_base_mask(mask)
 
         libcudf.filling.fill_in_place(self, begin, end, slr.device_value)
@@ -1553,7 +1552,11 @@ def column_empty(
         data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))
 
     if masked:
-        mask = create_null_mask(row_count, state=MaskState.ALL_NULL)
+        mask = as_buffer(
+            plc.null_mask.create_null_mask(
+                row_count, plc.null_mask.MaskState.ALL_NULL
+            )
+        )
     else:
         mask = None
 
@@ -2210,7 +2213,9 @@ def _mask_from_cuda_array_interface_desc(obj, cai_mask) -> Buffer:
     typestr = desc["typestr"]
     typecode = typestr[1]
     if typecode == "t":
-        mask_size = bitmask_allocation_size_bytes(desc["shape"][0])
+        mask_size = plc.null_mask.bitmask_allocation_size_bytes(
+            desc["shape"][0]
+        )
         return as_buffer(data=desc["data"][0], size=mask_size, owner=obj)
     elif typecode == "b":
         col = as_column(cai_mask)
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 9962663e811..42df5123014 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -69,10 +69,7 @@ def __init__(
 
     @cached_property
     def memory_usage(self):
-        n = 0
-        if self.nullable:
-            n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
-
+        n = super().memory_usage
         child0_size = (self.size + 1) * self.base_children[0].dtype.itemsize
         current_base_child = self.base_children[1]
         current_offset = self.offset
@@ -97,7 +94,7 @@ def memory_usage(self):
         ) * current_base_child.dtype.itemsize
 
         if current_base_child.nullable:
-            n += cudf._lib.null_mask.bitmask_allocation_size_bytes(
+            n += plc.null_mask.bitmask_allocation_size_bytes(
                 current_base_child.size
             )
         return n
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index a9ab2d373fd..47763063c4c 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5750,17 +5750,13 @@ def end_offset(self) -> int:
 
     @cached_property
     def memory_usage(self) -> int:
-        n = 0
-        if self.data is not None:
-            n += self.data.size
+        n = super().memory_usage
         if len(self.base_children) == 1:
             child0_size = (self.size + 1) * self.base_children[
                 0
             ].dtype.itemsize
 
             n += child0_size
-        if self.nullable:
-            n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
         return n
 
     @property
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 8f16ba4e15b..2adc6b54bab 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -101,10 +101,7 @@ def to_pandas(
 
     @cached_property
     def memory_usage(self) -> int:
-        n = 0
-        if self.nullable:
-            n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
-
+        n = super().memory_usage
         for child in self.children:
             n += child.memory_usage
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 73c0af45293..b58ab13be93 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -45,7 +45,7 @@
 from cudf.core import column, df_protocol, indexing_utils, reshape
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
-from cudf.core.buffer import acquire_spill_lock
+from cudf.core.buffer import acquire_spill_lock, as_buffer
 from cudf.core.column import (
     CategoricalColumn,
     ColumnBase,
@@ -3191,9 +3191,10 @@ def where(self, cond, other=None, inplace=False, axis=None, level=None):
 
                 out.append(result._with_type_metadata(col.dtype))
             else:
-                out_mask = cudf._lib.null_mask.create_null_mask(
-                    len(source_col),
-                    state=cudf._lib.null_mask.MaskState.ALL_NULL,
+                out_mask = as_buffer(
+                    plc.null_mask.create_null_mask(
+                        len(source_col), plc.null_mask.MaskState.ALL_NULL
+                    )
                 )
                 out.append(source_col.set_mask(out_mask))
 
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 315324c130c..e977f037b79 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -19,7 +19,6 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib import groupby as libgroupby
-from cudf._lib.null_mask import bitmask_or
 from cudf._lib.sort import segmented_sort_by_key
 from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
@@ -1118,8 +1117,7 @@ def ngroup(self, ascending=True):
         """
         index = self.grouping.keys.unique().sort_values()
         num_groups = len(index)
-        _, has_null_group = bitmask_or([*index._columns])
-
+        has_null_group = any(col.has_nulls() for col in index._columns)
         if ascending:
             # Count ascending from 0 to num_groups - 1
             groups = range(num_groups)
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index a5dc8a5498c..6624a1a150e 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -15,8 +15,9 @@
 from numba.cuda.cudadecl import registry as cuda_decl_registry
 from numba.cuda.cudaimpl import lower as cuda_lower
 
+import pylibcudf as plc
+
 import cudf
-from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
 from cudf.core.udf.strings_lowering import cast_string_view_to_udf_string
 from cudf.core.udf.strings_typing import StringView, string_view, udf_string
@@ -91,7 +92,7 @@ def random_bitmask(size):
     size : int
         number of bits
     """
-    sz = bitmask_allocation_size_bytes(size)
+    sz = plc.null_mask.bitmask_allocation_size_bytes(size)
     rng = np.random.default_rng(seed=0)
     data = rng.integers(0, 255, dtype="u1", size=sz)
     return data.view("i1")
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 294253cd119..e6d252b8807 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -11,6 +11,7 @@
 import numpy as np
 import pandas as pd
 
+import pylibcudf as plc
 import rmm
 
 import cudf
@@ -252,7 +253,7 @@ def pa_mask_buffer_to_mask(mask_buf, size):
     """
     Convert PyArrow mask buffer to cuDF mask buffer
     """
-    mask_size = cudf._lib.null_mask.bitmask_allocation_size_bytes(size)
+    mask_size = plc.null_mask.bitmask_allocation_size_bytes(size)
     if mask_buf.size < mask_size:
         dbuf = rmm.DeviceBuffer(size=mask_size)
         dbuf.copy_from_host(np.asarray(mask_buf).view("u1"))