From 352d686ff1eafd5f06382c04e56558a27eb457c8 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 6 Mar 2024 21:30:58 -0600
Subject: [PATCH] Migrate filling operations to pylibcudf (#15225)

This PR migrates the filling operations in cuDF Python to pylibcudf.

Authors:
  - https://github.com/brandon-b-miller
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/15225
---
 docs/cudf/source/conf.py                      |   1 +
 .../user_guide/api_docs/pylibcudf/filling.rst |   6 +
 .../user_guide/api_docs/pylibcudf/index.rst   |   1 +
 python/cudf/cudf/_lib/filling.pyx             | 110 ++++--------
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |   1 +
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd  |   2 +
 python/cudf/cudf/_lib/pylibcudf/__init__.py   |   2 +
 python/cudf/cudf/_lib/pylibcudf/filling.pxd   |  35 ++++
 python/cudf/cudf/_lib/pylibcudf/filling.pyx   | 170 ++++++++++++++++++
 9 files changed, 250 insertions(+), 78 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/filling.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/filling.pyx

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 1b9e3c179cc..3bba50b482c 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -379,6 +379,7 @@ def _generate_namespaces(namespaces):
     "type_id",
     # Unknown base types
     "int32_t",
+    "void"
 }
 
 
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
new file mode 100644
index 00000000000..542a5e12bc4
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
@@ -0,0 +1,6 @@
+========
+filling
+========
+
+.. automodule:: cudf._lib.pylibcudf.filling
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index 2e5b3916c65..8cad95f61ae 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -13,6 +13,7 @@ This page provides API documentation for pylibcudf.
     column
     concatenate
     copying
+    filling
     gpumemoryview
     groupby
     join
diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx
index 63549f08cbd..b7302f3d07a 100644
--- a/python/cudf/cudf/_lib/filling.pyx
+++ b/python/cudf/cudf/_lib/filling.pyx
@@ -1,103 +1,57 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
 
-cimport cudf._lib.cpp.filling as cpp_filling
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
-from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.table.table cimport table
-from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+from cudf._lib.utils cimport columns_from_pylibcudf_table
+
+from cudf._lib import pylibcudf
+from cudf._lib.scalar import as_device_scalar
 
 
 @acquire_spill_lock()
 def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
-    cdef mutable_column_view c_destination = destination.mutable_view()
-    cdef size_type c_begin = <size_type> begin
-    cdef size_type c_end = <size_type> end
-    cdef const scalar* c_value = value.get_raw_ptr()
-
-    cpp_filling.fill_in_place(
-        c_destination,
-        c_begin,
-        c_end,
-        c_value[0]
+    pylibcudf.filling.fill_in_place(
+        destination.to_pylibcudf(mode='write'),
+        begin,
+        end,
+        (<DeviceScalar> as_device_scalar(value, dtype=destination.dtype)).c_value
     )
 
 
 @acquire_spill_lock()
 def fill(Column destination, int begin, int end, DeviceScalar value):
-    cdef column_view c_destination = destination.view()
-    cdef size_type c_begin = <size_type> begin
-    cdef size_type c_end = <size_type> end
-    cdef const scalar* c_value = value.get_raw_ptr()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_filling.fill(
-            c_destination,
-            c_begin,
-            c_end,
-            c_value[0]
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(
+        pylibcudf.filling.fill(
+            destination.to_pylibcudf(mode='read'),
+            begin,
+            end,
+            (<DeviceScalar> as_device_scalar(value)).c_value
+        )
+    )
 
 
 @acquire_spill_lock()
 def repeat(list inp, object count):
+    ctbl = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in inp])
     if isinstance(count, Column):
-        return _repeat_via_column(inp, count)
-    else:
-        return _repeat_via_size_type(inp, count)
-
-
-def _repeat_via_column(list inp, Column count):
-    cdef table_view c_inp = table_view_from_columns(inp)
-    cdef column_view c_count = count.view()
-    cdef unique_ptr[table] c_result
-
-    with nogil:
-        c_result = move(cpp_filling.repeat(
-            c_inp,
-            c_count,
-        ))
-
-    return columns_from_unique_ptr(move(c_result))
-
-
-def _repeat_via_size_type(list inp, size_type count):
-    cdef table_view c_inp = table_view_from_columns(inp)
-    cdef unique_ptr[table] c_result
-
-    with nogil:
-        c_result = move(cpp_filling.repeat(
-            c_inp,
+        count = count.to_pylibcudf(mode="read")
+    return columns_from_pylibcudf_table(
+        pylibcudf.filling.repeat(
+            ctbl,
             count
-        ))
-
-    return columns_from_unique_ptr(move(c_result))
+        )
+    )
 
 
 @acquire_spill_lock()
 def sequence(int size, DeviceScalar init, DeviceScalar step):
-    cdef size_type c_size = size
-    cdef const scalar* c_init = init.get_raw_ptr()
-    cdef const scalar* c_step = step.get_raw_ptr()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_filling.sequence(
-            c_size,
-            c_init[0],
-            c_step[0]
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(
+        pylibcudf.filling.sequence(
+            size,
+            (<DeviceScalar> as_device_scalar(init)).c_value,
+            (<DeviceScalar> as_device_scalar(step)).c_value
+        )
+    )
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index fd749a5edc1..ada47de5cae 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -18,6 +18,7 @@ set(cython_sources
     column.pyx
     concatenate.pyx
     copying.pyx
+    filling.pyx
     gpumemoryview.pyx
     groupby.pyx
     interop.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index 96aa42cc257..39b29eace10 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -6,6 +6,7 @@ from . cimport (
     binaryop,
     concatenate,
     copying,
+    filling,
     groupby,
     interop,
     join,
@@ -37,6 +38,7 @@ __all__ = [
     "binaryop",
     "concatenate",
     "copying",
+    "filling",
     "gpumemoryview",
     "groupby",
     "interop",
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 19cc782dd92..8ccb0ecc341 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -5,6 +5,7 @@
     binaryop,
     concatenate,
     copying,
+    filling,
     groupby,
     interop,
     join,
@@ -35,6 +36,7 @@
     "binaryop",
     "concatenate",
     "copying",
+    "filling",
     "gpumemoryview",
     "groupby",
     "interop",
diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pxd b/python/cudf/cudf/_lib/pylibcudf/filling.pxd
new file mode 100644
index 00000000000..55dbd7b075f
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/filling.pxd
@@ -0,0 +1,35 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from cudf._lib.cpp.types cimport size_type
+
+from .column cimport Column
+from .scalar cimport Scalar
+from .table cimport Table
+
+ctypedef fused ColumnOrSize:
+    Column
+    size_type
+
+cpdef Column fill(
+    Column destination,
+    size_type begin,
+    size_type end,
+    Scalar value,
+)
+
+cpdef void fill_in_place(
+    Column destination,
+    size_type c_begin,
+    size_type c_end,
+    Scalar value,
+)
+
+cpdef Column sequence(
+    size_type size,
+    Scalar init,
+    Scalar step,
+)
+
+cpdef Table repeat(
+    Table input_table,
+    ColumnOrSize count
+)
diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pyx b/python/cudf/cudf/_lib/pylibcudf/filling.pyx
new file mode 100644
index 00000000000..588ab58a146
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/filling.pyx
@@ -0,0 +1,170 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from cython.operator cimport dereference
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.filling cimport (
+    fill as cpp_fill,
+    fill_in_place as cpp_fill_in_place,
+    repeat as cpp_repeat,
+    sequence as cpp_sequence,
+)
+from cudf._lib.cpp.table.table cimport table
+from cudf._lib.cpp.types cimport size_type
+
+from .column cimport Column
+from .scalar cimport Scalar
+from .table cimport Table
+
+
+cpdef Column fill(
+    Column destination,
+    size_type begin,
+    size_type end,
+    Scalar value,
+):
+
+    """Fill destination column from begin to end with value.
+
+    For details, see :cpp:func:`fill`.
+
+    Parameters
+    ----------
+    destination : Column
+        The column to be filled
+    begin : size_type
+        The index to begin filling from.
+    end : size_type
+        The index at which to stop filling.
+    value : Scalar
+        The value to fill with.
+
+    Returns
+    -------
+    pylibcudf.Column
+        The result of the filling operation
+    """
+
+    cdef unique_ptr[column] result
+    with nogil:
+        result = move(
+            cpp_fill(
+                destination.view(),
+                begin,
+                end,
+                dereference((<Scalar> value).c_obj)
+            )
+        )
+    return Column.from_libcudf(move(result))
+
+cpdef void fill_in_place(
+    Column destination,
+    size_type begin,
+    size_type end,
+    Scalar value,
+):
+
+    """Fill destination column in place from begin to end with value.
+
+    For details, see :cpp:func:`fill_in_place`.
+
+    Parameters
+    ----------
+    destination : Column
+        The column to be filled
+    begin : size_type
+        The index to begin filling from.
+    end : size_type
+        The index at which to stop filling.
+    value : Scalar
+        The value to fill with.
+    """
+
+    with nogil:
+        cpp_fill_in_place(
+            destination.mutable_view(),
+            begin,
+            end,
+            dereference(value.c_obj)
+        )
+
+cpdef Column sequence(size_type size, Scalar init, Scalar step):
+    """Create a sequence column of size ``size`` with initial value ``init`` and step
+    ``step``.
+
+    For details, see :cpp:func:`sequence`.
+
+    Parameters
+    ----------
+    size : int
+        The size of the sequence
+    init : Scalar
+        The initial value of the sequence
+    step : Scalar
+        The step of the sequence
+    Returns
+    -------
+    pylibcudf.Column
+        The result of the sequence operation
+    """
+
+    cdef unique_ptr[column] result
+    cdef size_type c_size = size
+    with nogil:
+        result = move(
+            cpp_sequence(
+                c_size,
+                dereference(init.c_obj),
+                dereference(step.c_obj),
+            )
+        )
+    return Column.from_libcudf(move(result))
+
+
+cpdef Table repeat(
+    Table input_table,
+    ColumnOrSize count
+):
+    """Repeat rows of a Table.
+
+    If an integral value is specified for ``count``, every row is repeated ``count``
+    times. If ``count`` is a column, the number of repetitions of each row is defined
+    by the value at the corresponding index of ``count``.
+
+    For details, see :cpp:func:`repeat`.
+
+    Parameters
+    ----------
+    input_table : Table
+        The table to be repeated
+    count : Union[Column, size_type]
+        Integer value to repeat each row by or
+        non-nullable column of an integral type
+
+    Returns
+    -------
+    pylibcudf.Table
+        The result of the repeat operation
+    """
+
+    cdef unique_ptr[table] result
+
+    if ColumnOrSize is Column:
+        with nogil:
+            result = move(
+                cpp_repeat(
+                    input_table.view(),
+                    count.view()
+                )
+            )
+    if ColumnOrSize is size_type:
+        with nogil:
+            result = move(
+                cpp_repeat(
+                    input_table.view(),
+                    count
+                )
+            )
+    return Table.from_libcudf(move(result))