From 352d686ff1eafd5f06382c04e56558a27eb457c8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:30:58 -0600 Subject: [PATCH] Migrate filling operations to pylibcudf (#15225) This PR migrates the filling operations in cuDF Python to pylibcudf. Authors: - https://github.com/brandon-b-miller - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15225 --- docs/cudf/source/conf.py | 1 + .../user_guide/api_docs/pylibcudf/filling.rst | 6 + .../user_guide/api_docs/pylibcudf/index.rst | 1 + python/cudf/cudf/_lib/filling.pyx | 110 ++++-------- .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 1 + python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 2 + python/cudf/cudf/_lib/pylibcudf/__init__.py | 2 + python/cudf/cudf/_lib/pylibcudf/filling.pxd | 35 ++++ python/cudf/cudf/_lib/pylibcudf/filling.pyx | 170 ++++++++++++++++++ 9 files changed, 250 insertions(+), 78 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst create mode 100644 python/cudf/cudf/_lib/pylibcudf/filling.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/filling.pyx diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 1b9e3c179cc..3bba50b482c 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -379,6 +379,7 @@ def _generate_namespaces(namespaces): "type_id", # Unknown base types "int32_t", + "void" } diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst new file mode 100644 index 00000000000..542a5e12bc4 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst @@ -0,0 +1,6 @@ +======== +filling +======== + +.. automodule:: cudf._lib.pylibcudf.filling + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 2e5b3916c65..8cad95f61ae 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -13,6 +13,7 @@ This page provides API documentation for pylibcudf. column concatenate copying + filling gpumemoryview groupby join diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx index 63549f08cbd..b7302f3d07a 100644 --- a/python/cudf/cudf/_lib/filling.pyx +++ b/python/cudf/cudf/_lib/filling.pyx @@ -1,103 +1,57 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move -cimport cudf._lib.cpp.filling as cpp_filling from cudf._lib.column cimport Column -from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view -from cudf._lib.cpp.scalar.scalar cimport scalar -from cudf._lib.cpp.table.table cimport table -from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns +from cudf._lib.utils cimport columns_from_pylibcudf_table + +from cudf._lib import pylibcudf +from cudf._lib.scalar import as_device_scalar @acquire_spill_lock() def fill_in_place(Column destination, int begin, int end, DeviceScalar value): - cdef mutable_column_view c_destination = destination.mutable_view() - cdef size_type c_begin = begin - cdef size_type c_end = end - cdef const scalar* c_value = value.get_raw_ptr() - - cpp_filling.fill_in_place( - c_destination, - c_begin, - c_end, - c_value[0] + pylibcudf.filling.fill_in_place( + destination.to_pylibcudf(mode='write'), + begin, + end, + ( as_device_scalar(value, dtype=destination.dtype)).c_value ) @acquire_spill_lock() def fill(Column destination, int begin, int end, DeviceScalar value): - cdef column_view c_destination = destination.view() - cdef size_type c_begin = begin - cdef size_type c_end = end - cdef const scalar* c_value = value.get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_filling.fill( - c_destination, - c_begin, - c_end, - c_value[0] - )) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.filling.fill( + destination.to_pylibcudf(mode='read'), + begin, + end, + ( as_device_scalar(value)).c_value + ) + ) @acquire_spill_lock() def repeat(list inp, object count): + ctbl = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in inp]) if isinstance(count, Column): - return _repeat_via_column(inp, count) - else: - return _repeat_via_size_type(inp, count) - - -def _repeat_via_column(list inp, Column count): - cdef table_view c_inp = table_view_from_columns(inp) - cdef column_view c_count = count.view() - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_filling.repeat( - c_inp, - c_count, - )) - - return columns_from_unique_ptr(move(c_result)) - - -def _repeat_via_size_type(list inp, size_type count): - cdef table_view c_inp = table_view_from_columns(inp) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_filling.repeat( - c_inp, + count = count.to_pylibcudf(mode="read") + return columns_from_pylibcudf_table( + pylibcudf.filling.repeat( + ctbl, count - )) - - return columns_from_unique_ptr(move(c_result)) + ) + ) @acquire_spill_lock() def sequence(int size, DeviceScalar init, DeviceScalar step): - cdef size_type c_size = size - cdef const scalar* c_init = init.get_raw_ptr() - cdef const scalar* c_step = step.get_raw_ptr() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_filling.sequence( - c_size, - c_init[0], - c_step[0] - )) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.filling.sequence( + size, + ( as_device_scalar(init)).c_value, + ( as_device_scalar(step)).c_value + ) + ) diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index fd749a5edc1..ada47de5cae 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -18,6 +18,7 @@ set(cython_sources column.pyx concatenate.pyx copying.pyx + filling.pyx gpumemoryview.pyx groupby.pyx interop.pyx diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index 96aa42cc257..39b29eace10 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -6,6 +6,7 @@ from . cimport ( binaryop, concatenate, copying, + filling, groupby, interop, join, @@ -37,6 +38,7 @@ __all__ = [ "binaryop", "concatenate", "copying", + "filling", "gpumemoryview", "groupby", "interop", diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py index 19cc782dd92..8ccb0ecc341 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py @@ -5,6 +5,7 @@ binaryop, concatenate, copying, + filling, groupby, interop, join, @@ -35,6 +36,7 @@ "binaryop", "concatenate", "copying", + "filling", "gpumemoryview", "groupby", "interop", diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pxd b/python/cudf/cudf/_lib/pylibcudf/filling.pxd new file mode 100644 index 00000000000..55dbd7b075f --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/filling.pxd @@ -0,0 +1,35 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from cudf._lib.cpp.types cimport size_type + +from .column cimport Column +from .scalar cimport Scalar +from .table cimport Table + +ctypedef fused ColumnOrSize: + Column + size_type + +cpdef Column fill( + Column destination, + size_type begin, + size_type end, + Scalar value, +) + +cpdef void fill_in_place( + Column destination, + size_type c_begin, + size_type c_end, + Scalar value, +) + +cpdef Column sequence( + size_type size, + Scalar init, + Scalar step, +) + +cpdef Table repeat( + Table input_table, + ColumnOrSize count +) diff --git a/python/cudf/cudf/_lib/pylibcudf/filling.pyx b/python/cudf/cudf/_lib/pylibcudf/filling.pyx new file mode 100644 index 00000000000..588ab58a146 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/filling.pyx @@ -0,0 +1,170 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cython.operator cimport dereference +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.filling cimport ( + fill as cpp_fill, + fill_in_place as cpp_fill_in_place, + repeat as cpp_repeat, + sequence as cpp_sequence, +) +from cudf._lib.cpp.table.table cimport table +from cudf._lib.cpp.types cimport size_type + +from .column cimport Column +from .scalar cimport Scalar +from .table cimport Table + + +cpdef Column fill( + Column destination, + size_type begin, + size_type end, + Scalar value, +): + + """Fill destination column from begin to end with value. + + For details, see :cpp:func:`fill`. + + Parameters + ---------- + destination : Column + The column to be filled + begin : size_type + The index to begin filling from. + end : size_type + The index at which to stop filling. + value : Scalar + The value to fill with. + + Returns + ------- + pylibcudf.Column + The result of the filling operation + """ + + cdef unique_ptr[column] result + with nogil: + result = move( + cpp_fill( + destination.view(), + begin, + end, + dereference(( value).c_obj) + ) + ) + return Column.from_libcudf(move(result)) + +cpdef void fill_in_place( + Column destination, + size_type begin, + size_type end, + Scalar value, +): + + """Fill destination column in place from begin to end with value. + + For details, see :cpp:func:`fill_in_place`. + + Parameters + ---------- + destination : Column + The column to be filled + begin : size_type + The index to begin filling from. + end : size_type + The index at which to stop filling. + value : Scalar + The value to fill with. + """ + + with nogil: + cpp_fill_in_place( + destination.mutable_view(), + begin, + end, + dereference(value.c_obj) + ) + +cpdef Column sequence(size_type size, Scalar init, Scalar step): + """Create a sequence column of size ``size`` with initial value ``init`` and step + ``step``. + + For details, see :cpp:func:`sequence`. + + Parameters + ---------- + size : int + The size of the sequence + init : Scalar + The initial value of the sequence + step : Scalar + The step of the sequence + Returns + ------- + pylibcudf.Column + The result of the sequence operation + """ + + cdef unique_ptr[column] result + cdef size_type c_size = size + with nogil: + result = move( + cpp_sequence( + c_size, + dereference(init.c_obj), + dereference(step.c_obj), + ) + ) + return Column.from_libcudf(move(result)) + + +cpdef Table repeat( + Table input_table, + ColumnOrSize count +): + """Repeat rows of a Table. + + If an integral value is specified for ``count``, every row is repeated ``count`` + times. If ``count`` is a column, the number of repetitions of each row is defined + by the value at the corresponding index of ``count``. + + For details, see :cpp:func:`repeat`. + + Parameters + ---------- + input_table : Table + The table to be repeated + count : Union[Column, size_type] + Integer value to repeat each row by or + non-nullable column of an integral type + + Returns + ------- + pylibcudf.Table + The result of the repeat operation + """ + + cdef unique_ptr[table] result + + if ColumnOrSize is Column: + with nogil: + result = move( + cpp_repeat( + input_table.view(), + count.view() + ) + ) + if ColumnOrSize is size_type: + with nogil: + result = move( + cpp_repeat( + input_table.view(), + count + ) + ) + return Table.from_libcudf(move(result))