From fbc68afbf12f4b471c36bf6c33c03f09a3a315cb Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 14:51:37 +0000
Subject: [PATCH 1/9] cuda array interface is a property

---
 python/cudf/cudf/_lib/pylibcudf/column.pyx        | 2 +-
 python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index e0cf8b7ee32..cec9b11bb7e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -250,7 +250,7 @@ cdef class Column:
         column is in use.
         """
         data = gpumemoryview(obj)
-        iface = data.__cuda_array_interface__()
+        iface = data.__cuda_array_interface__
         if iface.get('mask') is not None:
             raise ValueError("mask not yet supported.")
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
index a2f5b2ac387..0904022a944 100644
--- a/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/gpumemoryview.pyx
@@ -22,5 +22,6 @@ cdef class gpumemoryview:
         # TODO: Need to respect readonly
         self.ptr = cai["data"][0]
 
+    @property
     def __cuda_array_interface__(self):
         return self.obj.__cuda_array_interface__

From 2937dbc54453c7cd4867d1e546ac30c13dea59da Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 14:51:57 +0000
Subject: [PATCH 2/9] Allow updating a Column with a new null mask

---
 python/cudf/cudf/_lib/pylibcudf/column.pxd |  1 +
 python/cudf/cudf/_lib/pylibcudf/column.pyx | 26 ++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pxd b/python/cudf/cudf/_lib/pylibcudf/column.pxd
index d13791d95cf..13ee0a70681 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pxd
@@ -50,6 +50,7 @@ cdef class Column:
     cpdef gpumemoryview null_mask(self)
     cpdef list children(self)
     cpdef Column copy(self)
+    cpdef Column with_mask(self, gpumemoryview, size_type)
 
     cpdef ListColumnView list_view(self)
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index cec9b11bb7e..4ad3637cc19 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -175,6 +175,32 @@ cdef class Column:
             children,
         )
 
+    cpdef Column with_mask(self, gpumemoryview mask, size_type null_count):
+        """Augment this column with a new null mask.
+
+        Parameters
+        ----------
+        mask
+            New mask (or None to unset the mask)
+        null_count
+            New null count. If this is incorrect, bad things happen.
+
+        Returns
+        -------
+        New Column object sharing data with self (except for the mask which is new).
+        """
+        if mask is None and null_count > 0:
+            raise ValueError("Empty mask must have null count of zero")
+        return Column(
+            self._data_type,
+            self._size,
+            self._data,
+            mask,
+            null_count,
+            self._offset,
+            self._children,
+        )
+
     @staticmethod
     cdef Column from_column_view(const column_view& cv, Column owner):
         """Create a Column from a libcudf column_view.

From 1f7735df79d1799a42b0ae2c07aaa8653f709fbf Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 14:53:18 +0000
Subject: [PATCH 3/9] Start porting transform.hpp to pylibcudf

For now just nans_to_nulls.
---
 .../user_guide/api_docs/pylibcudf/index.rst   |  1 +
 .../api_docs/pylibcudf/transform.rst          |  6 ++++
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  1 +
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd  |  2 ++
 python/cudf/cudf/_lib/pylibcudf/__init__.py   |  2 ++
 python/cudf/cudf/_lib/pylibcudf/transform.pxd |  7 ++++
 python/cudf/cudf/_lib/pylibcudf/transform.pyx | 35 +++++++++++++++++++
 7 files changed, 54 insertions(+)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/transform.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/transform.pyx

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index bd6f0f77357..5899d272160 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -34,6 +34,7 @@ This page provides API documentation for pylibcudf.
     stream_compaction
     table
     traits
+    transform
     types
     unary
 
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
new file mode 100644
index 00000000000..ef04bbad7e6
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst
@@ -0,0 +1,6 @@
+=========
+transform
+=========
+
+.. automodule:: cudf._lib.pylibcudf.transform
+   :members:
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index d22096081af..a2d11bbea6e 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -39,6 +39,7 @@ set(cython_sources
     sorting.pyx
     table.pyx
     traits.pyx
+    transform.pyx
     types.pyx
     unary.pyx
     utils.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index d4d615cde34..da2b7806203 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -24,6 +24,7 @@ from . cimport (
     stream_compaction,
     strings,
     traits,
+    transform,
     types,
     unary,
 )
@@ -63,6 +64,7 @@ __all__ = [
     "strings",
     "sorting",
     "traits",
+    "transform",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 91f8acaf682..acbc84d7177 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -24,6 +24,7 @@
     stream_compaction,
     strings,
     traits,
+    transform,
     types,
     unary,
 )
@@ -64,6 +65,7 @@
     "strings",
     "sorting",
     "traits",
+    "transform",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/transform.pxd b/python/cudf/cudf/_lib/pylibcudf/transform.pxd
new file mode 100644
index 00000000000..4b21feffe25
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/transform.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from .column cimport Column
+from .gpumemoryview cimport gpumemoryview
+
+
+cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input)
diff --git a/python/cudf/cudf/_lib/pylibcudf/transform.pyx b/python/cudf/cudf/_lib/pylibcudf/transform.pyx
new file mode 100644
index 00000000000..bf18c6a91a5
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/transform.pyx
@@ -0,0 +1,35 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move, pair
+
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from cudf._lib.pylibcudf.libcudf cimport transform as cpp_transform
+from cudf._lib.pylibcudf.libcudf.types cimport size_type
+
+from .column cimport Column
+from .gpumemoryview cimport gpumemoryview
+
+
+cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input):
+    """Create a null mask preserving existing nulls and converting nans to null.
+
+    Parameters
+    ----------
+    input
+        Column to produce new mask from.
+
+    Returns
+    -------
+    Two-tuple of a gpumemoryview wrapping the null mask and the new null count.
+    """
+    cdef pair[unique_ptr[device_buffer], size_type] c_result
+
+    with nogil:
+        c_result = move(cpp_transform.nans_to_nulls(input.view()))
+
+    return (
+        gpumemoryview(DeviceBuffer.c_from_unique_ptr(move(c_result.first))),
+        c_result.second
+    )

From c0de6a1901773388a2dc182e6f60e442fba2d030 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 15:00:22 +0000
Subject: [PATCH 4/9] Use newly wrapped nans_to_nulls

---
 python/cudf/cudf/_lib/transform.pyx | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index b325173f20d..86a4a60eef1 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -20,6 +20,7 @@ from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 cimport cudf._lib.pylibcudf.libcudf.transform as libcudf_transform
 from cudf._lib.column cimport Column
 from cudf._lib.expressions cimport Expression
+from cudf._lib.pylibcudf cimport transform as plc_transform
 from cudf._lib.pylibcudf.libcudf.column.column cimport column
 from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
 from cudf._lib.pylibcudf.libcudf.expressions cimport expression
@@ -82,18 +83,10 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
 
 @acquire_spill_lock()
 def nans_to_nulls(Column input):
-    cdef column_view c_input = input.view()
-    cdef pair[unique_ptr[device_buffer], size_type] c_output
-    cdef unique_ptr[device_buffer] c_buffer
-
-    with nogil:
-        c_output = move(libcudf_transform.nans_to_nulls(c_input))
-        c_buffer = move(c_output.first)
-
-    if c_output.second == 0:
-        return None
-
-    return as_buffer(DeviceBuffer.c_from_unique_ptr(move(c_buffer)))
+    (mask, _) = plc_transform.nans_to_nulls(
+        input.to_pylibcudf(mode="read")
+    )
+    return as_buffer(mask)
 
 
 @acquire_spill_lock()

From f0a254eff2180935983a6c4fb2d96b8d70cecd49 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 17:22:29 +0000
Subject: [PATCH 5/9] Annoying

---
 .../cudf_polars/containers/column.py          | 45 +++++++++++++------
 .../tests/containers/test_column.py           |  9 ++--
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 28685f0c4ed..af67059844e 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -128,24 +128,29 @@ def copy(self) -> Self:
         )
 
     def mask_nans(self) -> Self:
-        """Return a copy of self with nans masked out."""
-        if self.nan_count > 0:
-            raise NotImplementedError("Need to port transform.hpp to pylibcudf")
+        """Return a shallow copy of self with nans masked out."""
+        if plc.traits.is_floating_point(self.obj.type()):
+            old_count = self.obj.null_count()
+            mask, new_count = plc.transform.nans_to_nulls(self.obj)
+            result = type(self)(self.obj.with_mask(mask, new_count))
+            if old_count == new_count:
+                return result.sorted_like(self)
+            return result
         return self.copy()
 
     @functools.cached_property
     def nan_count(self) -> int:
         """Return the number of NaN values in the column."""
-        if self.obj.type().id() not in (plc.TypeId.FLOAT32, plc.TypeId.FLOAT64):
-            return 0
-        return plc.interop.to_arrow(
-            plc.reduce.reduce(
-                plc.unary.is_nan(self.obj),
-                plc.aggregation.sum(),
-                # TODO: pylibcudf needs to have a SizeType DataType singleton
-                plc.DataType(plc.TypeId.INT32),
-            )
-        ).as_py()
+        if plc.traits.is_floating_point(self.obj.type()):
+            return plc.interop.to_arrow(
+                plc.reduce.reduce(
+                    plc.unary.is_nan(self.obj),
+                    plc.aggregation.sum(),
+                    # TODO: pylibcudf needs to have a SizeType DataType singleton
+                    plc.DataType(plc.TypeId.INT32),
+                )
+            ).as_py()
+        return 0
 
 
 class NamedColumn(Column):
@@ -187,3 +192,17 @@ def copy(self, *, new_name: str | None = None) -> Self:
             order=self.order,
             null_order=self.null_order,
         )
+
+    def mask_nans(self) -> Self:
+        """Return a shallow copy of self with nans masked out."""
+        # Annoying, the inheritance is not right (can't call the
+        # super-type mask_nans), but will sort that by refactoring
+        # later.
+        if plc.traits.is_floating_point(self.obj.type()):
+            old_count = self.obj.null_count()
+            mask, new_count = plc.transform.nans_to_nulls(self.obj)
+            result = type(self)(self.obj.with_mask(mask, new_count), self.name)
+            if old_count == new_count:
+                return result.sorted_like(self)
+            return result
+        return self.copy()
diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py
index 3291d8db161..3f05674dc5a 100644
--- a/python/cudf_polars/tests/containers/test_column.py
+++ b/python/cudf_polars/tests/containers/test_column.py
@@ -3,12 +3,14 @@
 
 from __future__ import annotations
 
+from functools import partial
+
 import pyarrow
 import pytest
 
 import cudf._lib.pylibcudf as plc
 
-from cudf_polars.containers import Column
+from cudf_polars.containers import Column, NamedColumn
 
 
 def test_non_scalar_access_raises():
@@ -54,10 +56,11 @@ def test_shallow_copy():
 
 
 @pytest.mark.parametrize("typeid", [plc.TypeId.INT8, plc.TypeId.FLOAT32])
-def test_mask_nans(typeid):
+@pytest.mark.parametrize("constructor", [Column, partial(NamedColumn, name="name")])
+def test_mask_nans(typeid, constructor):
     dtype = plc.DataType(typeid)
     values = pyarrow.array([0, 0, 0], type=plc.interop.to_arrow(dtype))
-    column = Column(plc.interop.from_arrow(values))
+    column = constructor(plc.interop.from_arrow(values))
     masked = column.mask_nans()
     assert column.obj is masked.obj
 

From cff4fdc12b25ffb5441705a090dde91daf2e3fda Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 8 Jul 2024 17:23:20 +0000
Subject: [PATCH 6/9] Handle nans for nan-ignoring aggs in groupby-agg

---
 python/cudf_polars/cudf_polars/dsl/expr.py    | 12 ++++++++-
 .../tests/containers/test_column.py           | 11 +++++---
 .../cudf_polars/tests/expressions/test_agg.py | 25 +++++++++++++------
 python/cudf_polars/tests/test_groupby.py      | 24 ++++++++++++++++++
 4 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index f83d9e82d30..adf266bab81 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -867,7 +867,7 @@ def __init__(
         self.name = name
         self.options = options
         self.children = children
-        if self.name not in ("round", "unique"):
+        if self.name not in ("round", "unique", "mask_nans"):
             raise NotImplementedError(f"Unary function {name=}")
 
     def do_evaluate(
@@ -878,6 +878,9 @@ def do_evaluate(
         mapping: Mapping[Expr, Column] | None = None,
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
+        if self.name == "mask_nans":
+            (child,) = self.children
+            return child.evaluate(df, context=context, mapping=mapping).mask_nans()
         if self.name == "round":
             (decimal_places,) = self.options
             (values,) = (
@@ -1215,12 +1218,19 @@ def collect_agg(self, *, depth: int) -> AggInfo:
             raise NotImplementedError(
                 "Nested aggregations in groupby"
             )  # pragma: no cover; check_agg trips first
+        if (isminmax := self.name in {"min", "max"}) and self.options:
+            raise NotImplementedError("Nan propagation in groupby for min/max")
         (child,) = self.children
         ((expr, _, _),) = child.collect_agg(depth=depth + 1).requests
         if self.request is None:
             raise NotImplementedError(
                 f"Aggregation {self.name} in groupby"
             )  # pragma: no cover; __init__ trips first
+        if isminmax and plc.traits.is_floating_point(self.dtype):
+            assert expr is not None
+            # Ignore nans in these groupby aggs, do this by masking
+            # nans in the input
+            expr = UnaryFunction(self.dtype, "mask_nans", (), expr)
         return AggInfo([(expr, self.request, self)])
 
     def _reduce(
diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py
index 3f05674dc5a..4f3c0de5975 100644
--- a/python/cudf_polars/tests/containers/test_column.py
+++ b/python/cudf_polars/tests/containers/test_column.py
@@ -62,12 +62,15 @@ def test_mask_nans(typeid, constructor):
     values = pyarrow.array([0, 0, 0], type=plc.interop.to_arrow(dtype))
     column = constructor(plc.interop.from_arrow(values))
     masked = column.mask_nans()
-    assert column.obj is masked.obj
+    assert column.obj.null_count() == masked.obj.null_count()
 
 
-def test_mask_nans_float_with_nan_notimplemented():
+def test_mask_nans_float():
     dtype = plc.DataType(plc.TypeId.FLOAT32)
     values = pyarrow.array([0, 0, float("nan")], type=plc.interop.to_arrow(dtype))
     column = Column(plc.interop.from_arrow(values))
-    with pytest.raises(NotImplementedError):
-        _ = column.mask_nans()
+    masked = column.mask_nans()
+    expect = pyarrow.array([0, 0, None], type=plc.interop.to_arrow(dtype))
+    got = pyarrow.array(plc.interop.to_arrow(masked.obj))
+
+    assert expect == got
diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py
index 267d0a99692..e53fd7f8615 100644
--- a/python/cudf_polars/tests/expressions/test_agg.py
+++ b/python/cudf_polars/tests/expressions/test_agg.py
@@ -59,14 +59,25 @@ def test_agg(df, agg):
 
 
 @pytest.mark.parametrize(
-    "propagate_nans",
-    [pytest.param(False, marks=pytest.mark.xfail(reason="Need to mask nans")), True],
-    ids=["mask_nans", "propagate_nans"],
+    "op", [pl.Expr.min, pl.Expr.nan_min, pl.Expr.max, pl.Expr.nan_max]
 )
-@pytest.mark.parametrize("op", ["min", "max"])
-def test_agg_float_with_nans(propagate_nans, op):
-    df = pl.LazyFrame({"a": pl.Series([1, 2, float("nan")], dtype=pl.Float64())})
-    op = getattr(pl.Expr, f"nan_{op}" if propagate_nans else op)
+def test_agg_float_with_nans(op):
+    df = pl.LazyFrame(
+        {
+            "a": pl.Series([1, 2, float("nan")], dtype=pl.Float64()),
+            "b": pl.Series([1, 2, None], dtype=pl.Int8()),
+        }
+    )
+    q = df.select(op(pl.col("a")), op(pl.col("b")))
+
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.xfail(reason="https://github.com/pola-rs/polars/issues/17513")
+@pytest.mark.parametrize("op", [pl.Expr.max, pl.Expr.min])
+def test_agg_singleton(op):
+    df = pl.LazyFrame({"a": pl.Series([float("nan")])})
+
     q = df.select(op(pl.col("a")))
 
     assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index b84e2c16b43..81306397b9f 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -99,3 +99,27 @@ def test_groupby_unsupported(df, expr):
     q = df.group_by("key1").agg(expr)
 
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+@pytest.mark.xfail(reason="https://github.com/pola-rs/polars/issues/17513")
+def test_groupby_minmax_with_nan():
+    df = pl.LazyFrame(
+        {"key": [1, 2, 2, 2], "value": [float("nan"), 1, -1, float("nan")]}
+    )
+
+    q = df.group_by("key").agg(
+        pl.col("value").max().alias("max"), pl.col("value").min().alias("min")
+    )
+
+    assert_gpu_result_equal(q)
+
+
+@pytest.mark.parametrize("op", [pl.Expr.nan_max, pl.Expr.nan_min])
+def test_groupby_nan_minmax_raises(op):
+    df = pl.LazyFrame(
+        {"key": [1, 2, 2, 2], "value": [float("nan"), 1, -1, float("nan")]}
+    )
+
+    q = df.group_by("key").agg(op(pl.col("value")))
+
+    assert_ir_translation_raises(q, NotImplementedError)

From ceff6af561cc3d5969306515c585c1259471b143 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 10 Jul 2024 08:26:21 +0000
Subject: [PATCH 7/9] Fix docstring section

---
 python/cudf/cudf/_lib/pylibcudf/column.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index 4ad3637cc19..c7454560c01 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -426,8 +426,8 @@ def is_c_contiguous(
     itemsize : int
         Size of an element in bytes.
 
-    Return
-    ------
+    Returns
+    -------
     bool
         The boolean answer.
     """

From 15802e671e38e2dab49bd930ac59ee20e1a85178 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 12 Jul 2024 14:12:24 +0000
Subject: [PATCH 8/9] Docstring type annotations

---
 python/cudf/cudf/_lib/pylibcudf/column.pyx    | 4 ++--
 python/cudf/cudf/_lib/pylibcudf/transform.pyx | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx
index c7454560c01..cb96c1d9fce 100644
--- a/python/cudf/cudf/_lib/pylibcudf/column.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx
@@ -180,9 +180,9 @@ cdef class Column:
 
         Parameters
         ----------
-        mask
+        mask : gpumemoryview
             New mask (or None to unset the mask)
-        null_count
+        null_count : int
             New null count. If this is incorrect, bad things happen.
 
         Returns
diff --git a/python/cudf/cudf/_lib/pylibcudf/transform.pyx b/python/cudf/cudf/_lib/pylibcudf/transform.pyx
index bf18c6a91a5..a734e71b820 100644
--- a/python/cudf/cudf/_lib/pylibcudf/transform.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/transform.pyx
@@ -17,7 +17,7 @@ cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input):
 
     Parameters
     ----------
-    input
+    input : Column
         Column to produce new mask from.
 
     Returns

From cb2a5a4c54bd33a142832b3f4641e49cea57be68 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 12 Jul 2024 14:24:57 +0000
Subject: [PATCH 9/9] Test nans_to_nulls in pylibcudf

---
 python/cudf/cudf/pylibcudf_tests/conftest.py  | 11 ++++++-
 .../cudf/pylibcudf_tests/test_transform.py    | 32 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 python/cudf/cudf/pylibcudf_tests/test_transform.py

diff --git a/python/cudf/cudf/pylibcudf_tests/conftest.py b/python/cudf/cudf/pylibcudf_tests/conftest.py
index 39832eb4bba..b5bbf470eaf 100644
--- a/python/cudf/cudf/pylibcudf_tests/conftest.py
+++ b/python/cudf/cudf/pylibcudf_tests/conftest.py
@@ -141,6 +141,15 @@ def sorted_opt(request):
     return request.param
 
 
-@pytest.fixture(scope="session", params=[False, True])
+@pytest.fixture(
+    scope="session", params=[False, True], ids=["without_nulls", "with_nulls"]
+)
 def has_nulls(request):
     return request.param
+
+
+@pytest.fixture(
+    scope="session", params=[False, True], ids=["without_nans", "with_nans"]
+)
+def has_nans(request):
+    return request.param
diff --git a/python/cudf/cudf/pylibcudf_tests/test_transform.py b/python/cudf/cudf/pylibcudf_tests/test_transform.py
new file mode 100644
index 00000000000..312939888dd
--- /dev/null
+++ b/python/cudf/cudf/pylibcudf_tests/test_transform.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import math
+
+import pyarrow as pa
+from utils import assert_column_eq
+
+from cudf._lib import pylibcudf as plc
+
+
+def test_nans_to_nulls(has_nans):
+    if has_nans:
+        values = [1, float("nan"), float("nan"), None, 3, None]
+    else:
+        values = [1, 4, 5, None, 3, None]
+
+    replaced = [
+        None if (v is None or (v is not None and math.isnan(v))) else v
+        for v in values
+    ]
+
+    h_input = pa.array(values, type=pa.float32())
+    input = plc.interop.from_arrow(h_input)
+    assert input.null_count() == h_input.null_count
+    expect = pa.array(replaced, type=pa.float32())
+
+    mask, null_count = plc.transform.nans_to_nulls(input)
+
+    assert null_count == expect.null_count
+    got = input.with_mask(mask, null_count)
+
+    assert_column_eq(expect, got)