From ab8f5e2fab3dd6cfdca3425d38f03d76a599a314 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 16 Oct 2023 10:28:07 -0700 Subject: [PATCH 01/20] Update C++ enums ensuring they have a specified base type --- cpp/include/cudf/copying.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 63680473c14..b3a8836b193 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -211,7 +211,7 @@ std::unique_ptr scatter( /** * @brief Indicates when to allocate a mask, based on an existing mask. */ -enum class mask_allocation_policy { +enum class mask_allocation_policy : int32_t { NEVER, ///< Do not allocate a null mask, regardless of input RETAIN, ///< Allocate a null mask if the input contains one ALWAYS ///< Allocate a null mask, regardless of input From 5f58796ecf6f4e2a2dce4f5893400a1f4c16204d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 16 Oct 2023 10:28:56 -0700 Subject: [PATCH 02/20] Update Cython enum wrappers --- python/cudf/cudf/_lib/cpp/copying.pxd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index 5637b55ac1c..ae0296121a5 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -47,10 +47,10 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: table_view target, ) except + - ctypedef enum mask_allocation_policy: - NEVER 'cudf::mask_allocation_policy::NEVER', - RETAIN 'cudf::mask_allocation_policy::RETAIN', - ALWAYS 'cudf::mask_allocation_policy::ALWAYS' + cpdef enum class mask_allocation_policy(int32_t): + NEVER + RETAIN + ALWAYS cdef unique_ptr[column] empty_like ( column_view input_column @@ -148,6 +148,6 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: size_type index ) except + - ctypedef enum sample_with_replacement: - FALSE 'cudf::sample_with_replacement::FALSE', - TRUE 'cudf::sample_with_replacement::TRUE', + cpdef enum class sample_with_replacement(bool): + FALSE + TRUE From c499b41a37e532d9c9a6865c9d51a099aa8c031a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Nov 2023 17:22:44 -0800 Subject: [PATCH 03/20] Add const to all exported C++ declarations that were missing it --- python/cudf/cudf/_lib/cpp/copying.pxd | 68 +++++++++++++-------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index ae0296121a5..ebb73bb9cef 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -36,15 +36,15 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[table] scatter ( - table_view source_table, - column_view scatter_map, - table_view target_table, + const table_view source_table, + const column_view scatter_map, + const table_view target_table, ) except + cdef unique_ptr[table] scatter ( vector[reference_wrapper[constscalar]] source_scalars, - column_view indices, - table_view target, + const column_view indices, + const table_view target, ) except + cpdef enum class mask_allocation_policy(int32_t): @@ -53,26 +53,26 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ALWAYS cdef unique_ptr[column] empty_like ( - column_view input_column + const column_view input_column ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view input_column, mask_allocation_policy policy ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view input_column, size_type size, mask_allocation_policy policy ) except + cdef unique_ptr[table] empty_like ( - table_view input_table + const table_view input_table ) except + cdef void copy_range_in_place ( - column_view input_column, + const column_view input_column, mutable_column_view target_column, size_type input_begin, size_type input_end, @@ -80,71 +80,71 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[column] copy_range ( - column_view input_column, - column_view target_column, + const column_view input_column, + const column_view target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef vector[column_view] slice ( - column_view input_column, + const column_view input_column, vector[size_type] indices ) except + cdef vector[table_view] slice ( - table_view input_table, + const table_view input_table, vector[size_type] indices ) except + cdef vector[column_view] split ( - column_view input_column, + const column_view input_column, vector[size_type] splits ) except + cdef vector[table_view] split ( - table_view input_table, + const table_view input_table, vector[size_type] splits ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - column_view rhs, - column_view boolean_mask + const column_view lhs, + const column_view rhs, + const column_view boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - column_view rhs, - column_view boolean_mask + const scalar& lhs, + const column_view rhs, + const column_view boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - scalar rhs, - column_view boolean_mask + const column_view lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - scalar rhs, - column_view boolean_mask + const scalar& lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - table_view input, - table_view target, - column_view boolean_mask + const table_view input, + const table_view target, + const column_view boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( vector[reference_wrapper[constscalar]] input, - table_view target, - column_view boolean_mask + const table_view target, + const column_view boolean_mask ) except + cdef unique_ptr[scalar] get_element ( - column_view input, + const column_view input, size_type index ) except + From b41c95f248f88d15dc009570373fa59879bf248a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Nov 2023 17:25:53 -0800 Subject: [PATCH 04/20] Allow calling get in a nogil context --- python/cudf/cudf/_lib/pylibcudf/scalar.pxd | 2 +- python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd index 09d853d832f..0edc934ca22 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd @@ -21,7 +21,7 @@ cdef class Scalar: # needed for deallocation cdef DeviceMemoryResource mr - cdef const scalar* get(self) except * + cdef const scalar* get(self) noexcept nogil cpdef DataType type(self) cpdef bool is_valid(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx index 04f588bd3e6..965f10999f2 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx @@ -108,7 +108,7 @@ cdef class Scalar: return pa.pyarrow_wrap_scalar(c_result) - cdef const scalar* get(self) except *: + cdef const scalar* get(self) noexcept nogil: return self.c_obj.get() cpdef DataType type(self): From 8bace0848914fc4210fb6b915adf41085f689714 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 9 Nov 2023 17:31:52 -0800 Subject: [PATCH 05/20] Add shift to pylibcudf --- python/cudf/cudf/_lib/copying.pyx | 24 ++++++--------------- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 4 ++++ python/cudf/cudf/_lib/pylibcudf/copying.pyx | 20 +++++++++++++++-- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index ea6ee76c14a..42f2499bd15 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -2,7 +2,7 @@ import pickle -from libc.stdint cimport int32_t, uint8_t, uintptr_t +from libc.stdint cimport uint8_t, uintptr_t from libcpp cimport bool from libcpp.memory cimport make_shared, shared_ptr, unique_ptr from libcpp.utility cimport move @@ -614,7 +614,6 @@ def boolean_mask_scatter(list input_, list target_columns, @acquire_spill_lock() def shift(Column input, int offset, object fill_value=None): - cdef DeviceScalar fill if isinstance(fill_value, DeviceScalar): @@ -622,21 +621,12 @@ def shift(Column input, int offset, object fill_value=None): else: fill = as_device_scalar(fill_value, input.dtype) - cdef column_view c_input = input.view() - cdef int32_t c_offset = offset - cdef const scalar* c_fill_value = fill.get_raw_ptr() - cdef unique_ptr[column] c_output - - with nogil: - c_output = move( - cpp_copying.shift( - c_input, - c_offset, - c_fill_value[0] - ) - ) - - return Column.from_unique_ptr(move(c_output)) + col = pylibcudf.copying.shift( + input.to_pylibcudf(mode="read"), + offset, + fill.c_value, + ) + return Column.from_pylibcudf(col) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index d57be650710..62d27724dcb 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -3,8 +3,10 @@ from libcpp cimport bool as cbool from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.types cimport size_type from .column cimport Column +from .scalar cimport Scalar from .table cimport Table @@ -13,3 +15,5 @@ cpdef Table gather( Column gather_map, out_of_bounds_policy bounds_policy ) + +cpdef Column shift(Column input, size_type offset, Scalar fill_values) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index a27b44b3107..44524d7429d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -1,5 +1,7 @@ # Copyright (c) 2023, NVIDIA CORPORATION. +from cython.operator import dereference + from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -8,13 +10,14 @@ from libcpp.utility cimport move # we really want here would be # cimport libcudf... libcudf.copying.algo(...) from cudf._lib.cpp cimport copying as cpp_copying +from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.table.table cimport table +from cudf._lib.cpp.types cimport size_type from cudf._lib.cpp.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint -from cudf._lib.cpp.table.table cimport table - from .column cimport Column from .table cimport Table @@ -55,3 +58,16 @@ cpdef Table gather( ) ) return Table.from_libcudf(move(c_result)) + + +cpdef Column shift(Column input, size_type offset, Scalar fill_values): + cdef unique_ptr[column] c_result + with nogil: + c_result = move( + cpp_copying.shift( + input.view(), + offset, + dereference(fill_values.c_obj) + ) + ) + return Column.from_libcudf(move(c_result)) From d07de85d9133173bf1987567abc20e1f805b92b5 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Nov 2023 13:56:24 -0800 Subject: [PATCH 06/20] Implement scatter --- python/cudf/cudf/_lib/copying.pyx | 61 +++------------------ python/cudf/cudf/_lib/pylibcudf/copying.pxd | 2 + python/cudf/cudf/_lib/pylibcudf/copying.pyx | 40 ++++++++++++++ 3 files changed, 51 insertions(+), 52 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 42f2499bd15..a2ebb92520f 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -184,48 +184,6 @@ def gather( return columns_from_pylibcudf_table(tbl) -cdef scatter_scalar(list source_device_slrs, - column_view scatter_map, - table_view target_table): - cdef vector[reference_wrapper[constscalar]] c_source - cdef DeviceScalar d_slr - cdef unique_ptr[table] c_result - - c_source.reserve(len(source_device_slrs)) - for d_slr in source_device_slrs: - c_source.push_back( - reference_wrapper[constscalar](d_slr.get_raw_ptr()[0]) - ) - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -cdef scatter_column(list source_columns, - column_view scatter_map, - table_view target_table): - cdef table_view c_source = table_view_from_columns(source_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - return columns_from_unique_ptr(move(c_result)) - - @acquire_spill_lock() def scatter(list sources, Column scatter_map, list target_columns, bool bounds_check=True): @@ -243,9 +201,6 @@ def scatter(list sources, Column scatter_map, list target_columns, if len(sources) == 0: return [] - cdef column_view scatter_map_view = scatter_map.view() - cdef table_view target_table_view = table_view_from_columns(target_columns) - if bounds_check: n_rows = len(target_columns[0]) if not ( @@ -257,14 +212,16 @@ def scatter(list sources, Column scatter_map, list target_columns, ) if isinstance(sources[0], Column): - return scatter_column( - sources, scatter_map_view, target_table_view - ) + plc_source = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]) else: - source_scalars = [as_device_scalar(slr) for slr in sources] - return scatter_scalar( - source_scalars, scatter_map_view, target_table_view - ) + plc_source = [( as_device_scalar(slr)).c_value for slr in sources] + + tbl = pylibcudf.copying.scatter( + plc_source, + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + ) + return columns_from_pylibcudf_table(tbl) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 62d27724dcb..65cbca71965 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -17,3 +17,5 @@ cpdef Table gather( ) cpdef Column shift(Column input, size_type offset, Scalar fill_values) + +cpdef Table scatter(object source, Column scatter_map, Table target_table) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 44524d7429d..cc32a428931 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -2,8 +2,10 @@ from cython.operator import dereference +from libcpp.functional cimport reference_wrapper from libcpp.memory cimport unique_ptr from libcpp.utility cimport move +from libcpp.vector cimport vector # TODO: We want to make cpp a more full-featured package so that we can access # directly from that. It will make namespacing much cleaner in pylibcudf. What @@ -12,6 +14,7 @@ from libcpp.utility cimport move from cudf._lib.cpp cimport copying as cpp_copying from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.types cimport size_type @@ -21,6 +24,9 @@ from cudf._lib.cpp.copying import \ from .column cimport Column from .table cimport Table +# workaround for https://github.com/cython/cython/issues/3885 +ctypedef const scalar constscalar + # TODO: Is it OK to reference the corresponding libcudf algorithm in the # documentation? Otherwise there's a lot of room for duplication. @@ -71,3 +77,37 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_values): ) ) return Column.from_libcudf(move(c_result)) + + +cpdef Table scatter(object source, Column scatter_map, Table target_table): + cdef unique_ptr[table] c_result + cdef vector[reference_wrapper[constscalar]] source_scalars + cdef Scalar slr + + if isinstance(source, Table): + with nogil: + c_result = move( + cpp_copying.scatter( + (
source).view(), + scatter_map.view(), + target_table.view(), + ) + ) + elif isinstance(source, list): # TODO: is list too restrictive? + for slr in source: + source_scalars.push_back( + reference_wrapper[constscalar](dereference(slr.c_obj)) + ) + + with nogil: + c_result = move( + cpp_copying.scatter( + source_scalars, + scatter_map.view(), + target_table.view(), + ) + ) + else: + raise ValueError("source must be a Table or list[Scalar]") + + return Table.from_libcudf(move(c_result)) From 61c5406cd0c7596cd8ca08cb30dc8f79c6e43815 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Nov 2023 16:23:28 -0800 Subject: [PATCH 07/20] Implement empty_like --- python/cudf/cudf/_lib/copying.pyx | 25 +++++++++------------ python/cudf/cudf/_lib/pylibcudf/copying.pxd | 2 ++ python/cudf/cudf/_lib/pylibcudf/copying.pyx | 23 +++++++++++++++++++ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index a2ebb92520f..e6a200356b5 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -226,14 +226,11 @@ def scatter(list sources, Column scatter_map, list target_columns, @acquire_spill_lock() def column_empty_like(Column input_column): - - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_column_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.empty_like( + input_column.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() @@ -263,13 +260,11 @@ def column_allocate_like(Column input_column, size=None): @acquire_spill_lock() def columns_empty_like(list input_columns): - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_table_view)) - - return columns_from_unique_ptr(move(c_result)) + return columns_from_pylibcudf_table( + pylibcudf.copying.empty_like( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) + ) + ) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 65cbca71965..4c4b9a1d9d8 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -19,3 +19,5 @@ cpdef Table gather( cpdef Column shift(Column input, size_type offset, Scalar fill_values) cpdef Table scatter(object source, Column scatter_map, Table target_table) + +cpdef object empty_like(object input) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index cc32a428931..6ac099bd156 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -111,3 +111,26 @@ cpdef Table scatter(object source, Column scatter_map, Table target_table): raise ValueError("source must be a Table or list[Scalar]") return Table.from_libcudf(move(c_result)) + + +cpdef object empty_like(object input): + cdef unique_ptr[column] c_column_result + cdef unique_ptr[table] c_table_result + if isinstance(input, Column): + with nogil: + c_column_result = move( + cpp_copying.empty_like( + ( input).view(), + ) + ) + return Column.from_libcudf(move(c_column_result)) + elif isinstance(input, Table): + with nogil: + c_table_result = move( + cpp_copying.empty_like( + (
input).view(), + ) + ) + return Table.from_libcudf(move(c_table_result)) + else: + raise ValueError("input must be a Table or a Column") From a4ce491e1b95435f8be1218169dc07cb47f68628 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 13 Nov 2023 17:47:23 -0800 Subject: [PATCH 08/20] Implement allocate_like --- python/cudf/cudf/_lib/copying.pyx | 27 +++++---------------- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 4 ++- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 22 ++++++++++++++++- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index e6a200356b5..05bb4d6f21d 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -235,27 +235,12 @@ def column_empty_like(Column input_column): @acquire_spill_lock() def column_allocate_like(Column input_column, size=None): - - cdef size_type c_size = 0 - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - if size is None: - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - cpp_copying.mask_allocation_policy.RETAIN) - ) - else: - c_size = size - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - c_size, - cpp_copying.mask_allocation_policy.RETAIN) - ) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.allocate_like( + input_column.to_pylibcudf(mode="read"), + size, + ) + ) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 4c4b9a1d9d8..085c4fac80b 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -2,7 +2,7 @@ from libcpp cimport bool as cbool -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy from cudf._lib.cpp.types cimport size_type from .column cimport Column @@ -21,3 +21,5 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_values) cpdef Table scatter(object source, Column scatter_map, Table target_table) cpdef object empty_like(object input) + +cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 6ac099bd156..b6c974f218f 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -13,11 +13,13 @@ from libcpp.vector cimport vector # cimport libcudf... libcudf.copying.algo(...) from cudf._lib.cpp cimport copying as cpp_copying from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.types cimport size_type +from cudf._lib.cpp.copying import \ + mask_allocation_policy as MaskAllocationPolicy # no-cython-lint from cudf._lib.cpp.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint @@ -134,3 +136,21 @@ cpdef object empty_like(object input): return Table.from_libcudf(move(c_table_result)) else: raise ValueError("input must be a Table or a Column") + + +cpdef Column allocate_like( + Column input_column, mask_allocation_policy policy, size=None +): + cdef unique_ptr[column] c_result + cdef size_type c_size = size if size is not None else input_column.size() + + with nogil: + c_result = move( + cpp_copying.allocate_like( + input_column.view(), + c_size, + policy, + ) + ) + + return Column.from_libcudf(move(c_result)) From 644f26c3971fdc8d5e6135e1b1cc2e5ab708fc98 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 20 Nov 2023 17:04:19 -0800 Subject: [PATCH 09/20] Implement copy_if_else --- python/cudf/cudf/_lib/copying.pyx | 26 +++++------- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 45 +++++++++++++++++++++ 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 05bb4d6f21d..5fc26f8aee5 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -450,23 +450,15 @@ def _copy_if_else_scalar_scalar(DeviceScalar lhs, @acquire_spill_lock() def copy_if_else(object lhs, object rhs, Column boolean_mask): - - if isinstance(lhs, Column): - if isinstance(rhs, Column): - return _copy_if_else_column_column(lhs, rhs, boolean_mask) - else: - return _copy_if_else_column_scalar( - lhs, as_device_scalar(rhs), boolean_mask) - else: - if isinstance(rhs, Column): - return _copy_if_else_scalar_column( - as_device_scalar(lhs), rhs, boolean_mask) - else: - if lhs is None and rhs is None: - return lhs - - return _copy_if_else_scalar_scalar( - as_device_scalar(lhs), as_device_scalar(rhs), boolean_mask) + return Column.from_pylibcudf( + pylibcudf.copying.copy_if_else( + lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column) + else ( as_device_scalar(lhs)).c_value, + rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column) + else ( as_device_scalar(rhs)).c_value, + boolean_mask.to_pylibcudf(mode="read"), + ) + ) def _boolean_mask_scatter_columns(list input_columns, list target_columns, diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index b6c974f218f..036daa38d1e 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -154,3 +154,48 @@ cpdef Column allocate_like( ) return Column.from_libcudf(move(c_result)) + + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): + cdef unique_ptr[column] result + + if isinstance(lhs, Column) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Column) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + else: + raise ValueError(f"Invalid arguments {lhs} and {rhs}") + + return Column.from_libcudf(move(result)) From f1c9796f5ea385e60e3100b7104d5fb59ef78520 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 20 Nov 2023 17:41:30 -0800 Subject: [PATCH 10/20] Implement boolean_mask_scatter --- python/cudf/cudf/_lib/copying.pyx | 70 +++------------------ python/cudf/cudf/_lib/pylibcudf/copying.pyx | 41 ++++++++++++ 2 files changed, 50 insertions(+), 61 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 5fc26f8aee5..786e37f1b07 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -24,7 +24,6 @@ from cudf._lib.utils cimport table_view_from_columns, table_view_from_table from cudf._lib.reduce import minmax from cudf.core.abc import Serializable -from libcpp.functional cimport reference_wrapper from libcpp.memory cimport make_unique cimport cudf._lib.cpp.contiguous_split as cpp_contiguous_split @@ -36,13 +35,11 @@ from cudf._lib.cpp.lists.gather cimport ( ) from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.scalar.scalar cimport scalar -from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.utils cimport ( columns_from_pylibcudf_table, columns_from_table_view, - columns_from_unique_ptr, data_from_table_view, table_view_from_columns, ) @@ -461,53 +458,6 @@ def copy_if_else(object lhs, object rhs, Column boolean_mask): ) -def _boolean_mask_scatter_columns(list input_columns, list target_columns, - Column boolean_mask): - - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_table_view, - target_table_view, - boolean_mask_view - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -def _boolean_mask_scatter_scalar(list input_scalars, list target_columns, - Column boolean_mask): - - cdef vector[reference_wrapper[constscalar]] input_scalar_vector - input_scalar_vector.reserve(len(input_scalars)) - cdef DeviceScalar scl - for scl in input_scalars: - input_scalar_vector.push_back(reference_wrapper[constscalar]( - scl.get_raw_ptr()[0])) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_scalar_vector, - target_table_view, - boolean_mask_view - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - @acquire_spill_lock() def boolean_mask_scatter(list input_, list target_columns, Column boolean_mask): @@ -527,18 +477,16 @@ def boolean_mask_scatter(list input_, list target_columns, return [] if isinstance(input_[0], Column): - return _boolean_mask_scatter_columns( - input_, - target_columns, - boolean_mask - ) + plc_input = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]) else: - scalar_list = [as_device_scalar(i) for i in input_] - return _boolean_mask_scatter_scalar( - scalar_list, - target_columns, - boolean_mask - ) + plc_input = [( as_device_scalar(i)).c_value for i in input_] + + tbl = pylibcudf.copying.boolean_mask_scatter( + plc_input, + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), + ) + return columns_from_pylibcudf_table(tbl) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 036daa38d1e..08c8a8a7cec 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -199,3 +199,44 @@ cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): raise ValueError(f"Invalid arguments {lhs} and {rhs}") return Column.from_libcudf(move(result)) + + +cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask): + cdef unique_ptr[table] result + cdef vector[reference_wrapper[const scalar]] c_scalars + cdef Scalar slr + + # TODO: Could generalize to sequence + if isinstance(input, list): + if not isinstance(input[0], Scalar): + raise TypeError("input must be a list of scalars") + + c_scalars.reserve(len(input)) + for slr in input: + c_scalars.push_back( + # TODO: This requires the constscalar ctypedef + # https://github.com/cython/cython/issues/4180 + reference_wrapper[constscalar](dereference(slr.c_obj)) + ) + + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + c_scalars, + target.view(), + boolean_mask.view(), + ) + ) + elif isinstance(input, Table): + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + (
input).view(), + target.view(), + boolean_mask.view() + ) + ) + else: + raise ValueError(f"Invalid argument {input}") + + return Table.from_libcudf(move(result)) From 40c257ff5f2fe3287f7b94b2536540559c5ef35e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 27 Nov 2023 15:06:27 -0800 Subject: [PATCH 11/20] Add missing functions to pxd --- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 085c4fac80b..f5ebcf1c94b 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -23,3 +23,7 @@ cpdef Table scatter(object source, Column scatter_map, Table target_table) cpdef object empty_like(object input) cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) + +cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask) From 23f441f09298b8c3fb7c6f3250607be025669b89 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 27 Nov 2023 15:26:05 -0800 Subject: [PATCH 12/20] Implement copy_range --- python/cudf/cudf/_lib/copying.pyx | 26 +++++++-------------- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 8 +++++++ python/cudf/cudf/_lib/pylibcudf/copying.pyx | 21 +++++++++++++++++ 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 786e37f1b07..e79f498639a 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -113,25 +113,15 @@ def _copy_range(Column input_column, size_type input_begin, size_type input_end, size_type target_begin): - - cdef column_view input_column_view = input_column.view() - cdef column_view target_column_view = target_column.view() - cdef size_type c_input_begin = input_begin - cdef size_type c_input_end = input_end - cdef size_type c_target_begin = target_begin - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.copy_range( - input_column_view, - target_column_view, - c_input_begin, - c_input_end, - c_target_begin) + return Column.from_pylibcudf( + pylibcudf.copying.copy_range( + input_column.to_pylibcudf(mode="read"), + target_column.to_pylibcudf(mode="read"), + input_begin, + input_end, + target_begin ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index f5ebcf1c94b..97d1640567e 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -27,3 +27,11 @@ cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, s cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask) + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 08c8a8a7cec..4567210b12d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -240,3 +240,24 @@ cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask raise ValueError(f"Invalid argument {input}") return Table.from_libcudf(move(result)) + + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +): + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_copying.copy_range( + input_column.view(), + target_column.view(), + input_begin, + input_end, + target_begin) + ) + + return Column.from_libcudf(move(c_result)) From 0650334ce763eec32ce55aaa5f524d4a5544e63e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 16:34:29 -0800 Subject: [PATCH 13/20] Change all methods to split column and table functions --- python/cudf/cudf/_lib/copying.pyx | 38 +++-- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 12 +- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 160 +++++++++++--------- 3 files changed, 117 insertions(+), 93 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index e79f498639a..54dfd6d12bc 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -199,22 +199,25 @@ def scatter(list sources, Column scatter_map, list target_columns, ) if isinstance(sources[0], Column): - plc_source = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]) + tbl = pylibcudf.copying.table_scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]), + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + ) else: - plc_source = [( as_device_scalar(slr)).c_value for slr in sources] + tbl = pylibcudf.copying.scalar_scatter( + [( as_device_scalar(slr)).c_value for slr in sources], + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + ) - tbl = pylibcudf.copying.scatter( - plc_source, - scatter_map.to_pylibcudf(mode="read"), - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - ) return columns_from_pylibcudf_table(tbl) @acquire_spill_lock() def column_empty_like(Column input_column): return Column.from_pylibcudf( - pylibcudf.copying.empty_like( + pylibcudf.copying.column_empty_like( input_column.to_pylibcudf(mode="read") ) ) @@ -233,7 +236,7 @@ def column_allocate_like(Column input_column, size=None): @acquire_spill_lock() def columns_empty_like(list input_columns): return columns_from_pylibcudf_table( - pylibcudf.copying.empty_like( + pylibcudf.copying.table_empty_like( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) ) ) @@ -467,15 +470,18 @@ def boolean_mask_scatter(list input_, list target_columns, return [] if isinstance(input_[0], Column): - plc_input = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]) + tbl = pylibcudf.copying.table_boolean_mask_scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), + ) else: - plc_input = [( as_device_scalar(i)).c_value for i in input_] + tbl = pylibcudf.copying.scalar_boolean_mask_scatter( + [( as_device_scalar(i)).c_value for i in input_], + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), + ) - tbl = pylibcudf.copying.boolean_mask_scatter( - plc_input, - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - boolean_mask.to_pylibcudf(mode="read"), - ) return columns_from_pylibcudf_table(tbl) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 97d1640567e..986fb54df7c 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -18,15 +18,21 @@ cpdef Table gather( cpdef Column shift(Column input, size_type offset, Scalar fill_values) -cpdef Table scatter(object source, Column scatter_map, Table target_table) +cpdef Table table_scatter(Table source, Column scatter_map, Table target_table) -cpdef object empty_like(object input) +cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table) + +cpdef object column_empty_like(Column input) + +cpdef object table_empty_like(Table input) cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) -cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask) +cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask) + +cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask) cpdef Column copy_range( Column input_column, diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 4567210b12d..c4d0028a2ee 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -81,61 +81,71 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_values): return Column.from_libcudf(move(c_result)) -cpdef Table scatter(object source, Column scatter_map, Table target_table): +cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): + cdef unique_ptr[table] c_result + + with nogil: + c_result = move( + cpp_copying.scatter( + source.view(), + scatter_map.view(), + target_table.view(), + ) + ) + + return Table.from_libcudf(move(c_result)) + + +cdef _check_is_list_of_scalars(list source): + if not isinstance(source, list) or not isinstance(source[0], Scalar): + raise ValueError("source must be a list[Scalar]") + + +# TODO: Could generalize list to sequence +cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): cdef unique_ptr[table] c_result cdef vector[reference_wrapper[constscalar]] source_scalars cdef Scalar slr - if isinstance(source, Table): - with nogil: - c_result = move( - cpp_copying.scatter( - (
source).view(), - scatter_map.view(), - target_table.view(), - ) - ) - elif isinstance(source, list): # TODO: is list too restrictive? - for slr in source: - source_scalars.push_back( - reference_wrapper[constscalar](dereference(slr.c_obj)) - ) + _check_is_list_of_scalars(source) - with nogil: - c_result = move( - cpp_copying.scatter( - source_scalars, - scatter_map.view(), - target_table.view(), - ) + for slr in source: + source_scalars.push_back( + reference_wrapper[constscalar](dereference(slr.c_obj)) + ) + + with nogil: + c_result = move( + cpp_copying.scatter( + source_scalars, + scatter_map.view(), + target_table.view(), ) - else: - raise ValueError("source must be a Table or list[Scalar]") + ) return Table.from_libcudf(move(c_result)) -cpdef object empty_like(object input): +cpdef object column_empty_like(Column input): cdef unique_ptr[column] c_column_result - cdef unique_ptr[table] c_table_result - if isinstance(input, Column): - with nogil: - c_column_result = move( - cpp_copying.empty_like( - ( input).view(), - ) + with nogil: + c_column_result = move( + cpp_copying.empty_like( + ( input).view(), ) - return Column.from_libcudf(move(c_column_result)) - elif isinstance(input, Table): - with nogil: - c_table_result = move( - cpp_copying.empty_like( - (
input).view(), - ) + ) + return Column.from_libcudf(move(c_column_result)) + + +cpdef object table_empty_like(Table input): + cdef unique_ptr[table] c_table_result + with nogil: + c_table_result = move( + cpp_copying.empty_like( + (
input).view(), ) - return Table.from_libcudf(move(c_table_result)) - else: - raise ValueError("input must be a Table or a Column") + ) + return Table.from_libcudf(move(c_table_result)) cpdef Column allocate_like( @@ -201,43 +211,45 @@ cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): return Column.from_libcudf(move(result)) -cpdef Table boolean_mask_scatter(object input, Table target, Column boolean_mask): +cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask): cdef unique_ptr[table] result - cdef vector[reference_wrapper[const scalar]] c_scalars - cdef Scalar slr - # TODO: Could generalize to sequence - if isinstance(input, list): - if not isinstance(input[0], Scalar): - raise TypeError("input must be a list of scalars") - - c_scalars.reserve(len(input)) - for slr in input: - c_scalars.push_back( - # TODO: This requires the constscalar ctypedef - # https://github.com/cython/cython/issues/4180 - reference_wrapper[constscalar](dereference(slr.c_obj)) + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + (
input).view(), + target.view(), + boolean_mask.view() ) + ) - with nogil: - result = move( - cpp_copying.boolean_mask_scatter( - c_scalars, - target.view(), - boolean_mask.view(), - ) - ) - elif isinstance(input, Table): - with nogil: - result = move( - cpp_copying.boolean_mask_scatter( - (
input).view(), - target.view(), - boolean_mask.view() - ) + return Table.from_libcudf(move(result)) + + +# TODO: Could generalize list to sequence +cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask): + _check_is_list_of_scalars(input) + + cdef vector[reference_wrapper[const scalar]] c_scalars + c_scalars.reserve(len(input)) + + cdef Scalar slr + for slr in input: + c_scalars.push_back( + # TODO: This requires the constscalar ctypedef + # https://github.com/cython/cython/issues/4180 + reference_wrapper[constscalar](dereference(slr.c_obj)) + ) + + cdef unique_ptr[table] result + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + c_scalars, + target.view(), + boolean_mask.view(), ) - else: - raise ValueError(f"Invalid argument {input}") + ) return Table.from_libcudf(move(result)) From a466c56979f8321a2d88e187df5c74fae84f7bd9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 16:40:48 -0800 Subject: [PATCH 14/20] Update all C++ bindings with proper reference and missing const --- python/cudf/cudf/_lib/cpp/copying.pxd | 62 +++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index ebb73bb9cef..f3e5c0aec72 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -36,15 +36,15 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[table] scatter ( - const table_view source_table, - const column_view scatter_map, - const table_view target_table, + const table_view& source_table, + const column_view& scatter_map, + const table_view& target_table, ) except + cdef unique_ptr[table] scatter ( - vector[reference_wrapper[constscalar]] source_scalars, - const column_view indices, - const table_view target, + const vector[reference_wrapper[constscalar]]& source_scalars, + const column_view& indices, + const table_view& target, ) except + cpdef enum class mask_allocation_policy(int32_t): @@ -53,74 +53,74 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ALWAYS cdef unique_ptr[column] empty_like ( - const column_view input_column + const column_view& input_column ) except + cdef unique_ptr[column] allocate_like ( - const column_view input_column, + const column_view& input_column, mask_allocation_policy policy ) except + cdef unique_ptr[column] allocate_like ( - const column_view input_column, + const column_view& input_column, size_type size, mask_allocation_policy policy ) except + cdef unique_ptr[table] empty_like ( - const table_view input_table + const table_view& input_table ) except + cdef void copy_range_in_place ( - const column_view input_column, - mutable_column_view target_column, + const column_view& input_column, + mutable_column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef unique_ptr[column] copy_range ( - const column_view input_column, - const column_view target_column, + const column_view& input_column, + const column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef vector[column_view] slice ( - const column_view input_column, + const column_view& input_column, vector[size_type] indices ) except + cdef vector[table_view] slice ( - const table_view input_table, + const table_view& input_table, vector[size_type] indices ) except + cdef vector[column_view] split ( - const column_view input_column, + const column_view& input_column, vector[size_type] splits ) except + cdef vector[table_view] split ( - const table_view input_table, + const table_view& input_table, vector[size_type] splits ) except + cdef unique_ptr[column] copy_if_else ( - const column_view lhs, - const column_view rhs, - const column_view boolean_mask + const column_view& lhs, + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( const scalar& lhs, - const column_view rhs, - const column_view boolean_mask + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - const column_view lhs, + const column_view& lhs, const scalar& rhs, const column_view boolean_mask ) except + @@ -132,19 +132,19 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[table] boolean_mask_scatter ( - const table_view input, - const table_view target, - const column_view boolean_mask + const table_view& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - vector[reference_wrapper[constscalar]] input, - const table_view target, - const column_view boolean_mask + const vector[reference_wrapper[constscalar]]& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[scalar] get_element ( - const column_view input, + const column_view& input, size_type index ) except + From ebcb1cd6c89fd83ba9ee8e3fb84b9b1e1235291f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 16:43:29 -0800 Subject: [PATCH 15/20] Update constscalar usage and docs --- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index c4d0028a2ee..af12b55a2f5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -26,7 +26,9 @@ from cudf._lib.cpp.copying import \ from .column cimport Column from .table cimport Table -# workaround for https://github.com/cython/cython/issues/3885 +# This is a workaround for +# https://github.com/cython/cython/issues/4180 +# when creating reference_wrapper[constscalar] in the constructor ctypedef const scalar constscalar @@ -104,7 +106,7 @@ cdef _check_is_list_of_scalars(list source): # TODO: Could generalize list to sequence cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): cdef unique_ptr[table] c_result - cdef vector[reference_wrapper[constscalar]] source_scalars + cdef vector[reference_wrapper[const scalar]] source_scalars cdef Scalar slr _check_is_list_of_scalars(source) @@ -236,8 +238,6 @@ cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean cdef Scalar slr for slr in input: c_scalars.push_back( - # TODO: This requires the constscalar ctypedef - # https://github.com/cython/cython/issues/4180 reference_wrapper[constscalar](dereference(slr.c_obj)) ) From f4e687badf5993e3cbf6489c789c2f07952cf0e6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 16:51:52 -0800 Subject: [PATCH 16/20] Centralize function for generating a list vector of scalars --- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 44 +++++++++------------ 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index af12b55a2f5..0ef53855a07 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -32,6 +32,21 @@ from .table cimport Table ctypedef const scalar constscalar +cdef vector[reference_wrapper[const scalar]] _make_scalar_vector(list source): + """Make a vector of reference_wrapper[const scalar] from a list of scalars.""" + if not isinstance(source, list) or not isinstance(source[0], Scalar): + raise ValueError("source must be a list[Scalar]") + + cdef vector[reference_wrapper[const scalar]] c_scalars + c_scalars.reserve(len(source)) + cdef Scalar slr + for slr in source: + c_scalars.push_back( + reference_wrapper[constscalar](dereference(slr.c_obj)) + ) + return c_scalars + + # TODO: Is it OK to reference the corresponding libcudf algorithm in the # documentation? Otherwise there's a lot of room for duplication. cpdef Table gather( @@ -98,24 +113,12 @@ cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): return Table.from_libcudf(move(c_result)) -cdef _check_is_list_of_scalars(list source): - if not isinstance(source, list) or not isinstance(source[0], Scalar): - raise ValueError("source must be a list[Scalar]") - - # TODO: Could generalize list to sequence cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): - cdef unique_ptr[table] c_result cdef vector[reference_wrapper[const scalar]] source_scalars - cdef Scalar slr - - _check_is_list_of_scalars(source) - - for slr in source: - source_scalars.push_back( - reference_wrapper[constscalar](dereference(slr.c_obj)) - ) + source_scalars = _make_scalar_vector(source) + cdef unique_ptr[table] c_result with nogil: c_result = move( cpp_copying.scatter( @@ -230,22 +233,13 @@ cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean # TODO: Could generalize list to sequence cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask): - _check_is_list_of_scalars(input) - - cdef vector[reference_wrapper[const scalar]] c_scalars - c_scalars.reserve(len(input)) - - cdef Scalar slr - for slr in input: - c_scalars.push_back( - reference_wrapper[constscalar](dereference(slr.c_obj)) - ) + source_scalars = _make_scalar_vector(input) cdef unique_ptr[table] result with nogil: result = move( cpp_copying.boolean_mask_scatter( - c_scalars, + source_scalars, target.view(), boolean_mask.view(), ) From 6417f7d5f3d2af99ac57edf335f0bb1cab0b5f94 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 17:15:00 -0800 Subject: [PATCH 17/20] Reorder functions --- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 16 ++--- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 68 ++++++++++----------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 986fb54df7c..756d25bcaba 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -16,8 +16,6 @@ cpdef Table gather( out_of_bounds_policy bounds_policy ) -cpdef Column shift(Column input, size_type offset, Scalar fill_values) - cpdef Table table_scatter(Table source, Column scatter_map, Table target_table) cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table) @@ -28,12 +26,6 @@ cpdef object table_empty_like(Table input) cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) -cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) - -cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask) - -cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask) - cpdef Column copy_range( Column input_column, Column target_column, @@ -41,3 +33,11 @@ cpdef Column copy_range( size_type input_end, size_type target_begin, ) + +cpdef Column shift(Column input, size_type offset, Scalar fill_values) + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) + +cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask) + +cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 0ef53855a07..4ea5be349be 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -85,19 +85,6 @@ cpdef Table gather( return Table.from_libcudf(move(c_result)) -cpdef Column shift(Column input, size_type offset, Scalar fill_values): - cdef unique_ptr[column] c_result - with nogil: - c_result = move( - cpp_copying.shift( - input.view(), - offset, - dereference(fill_values.c_obj) - ) - ) - return Column.from_libcudf(move(c_result)) - - cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): cdef unique_ptr[table] c_result @@ -171,6 +158,40 @@ cpdef Column allocate_like( return Column.from_libcudf(move(c_result)) +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +): + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_copying.copy_range( + input_column.view(), + target_column.view(), + input_begin, + input_end, + target_begin) + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column shift(Column input, size_type offset, Scalar fill_values): + cdef unique_ptr[column] c_result + with nogil: + c_result = move( + cpp_copying.shift( + input.view(), + offset, + dereference(fill_values.c_obj) + ) + ) + return Column.from_libcudf(move(c_result)) + + cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): cdef unique_ptr[column] result @@ -246,24 +267,3 @@ cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean ) return Table.from_libcudf(move(result)) - - -cpdef Column copy_range( - Column input_column, - Column target_column, - size_type input_begin, - size_type input_end, - size_type target_begin, -): - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.copy_range( - input_column.view(), - target_column.view(), - input_begin, - input_end, - target_begin) - ) - - return Column.from_libcudf(move(c_result)) From f3ef17d124a003a33e9d003288ea6e518db2a0ac Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 28 Nov 2023 17:53:58 -0800 Subject: [PATCH 18/20] Handle timezones in dtype conversion --- python/cudf/cudf/_lib/types.pyx | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 929f8b447ab..d87104bf168 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -3,6 +3,7 @@ from enum import IntEnum import numpy as np +import pandas as pd from libcpp.memory cimport make_shared, shared_ptr @@ -270,9 +271,13 @@ cpdef dtype_to_pylibcudf_type(dtype): else: tid = pylibcudf.TypeId.DECIMAL32 return pylibcudf.DataType(tid, -dtype.scale) - return pylibcudf.DataType( - SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[np.dtype(dtype)] - ) + + # libcudf types don't support localization so convert to the base type + if isinstance(dtype, pd.DatetimeTZDtype): + dtype = np.dtype(f" Date: Thu, 14 Dec 2023 00:28:42 +0000 Subject: [PATCH 19/20] Address PR comments --- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 4ea5be349be..341dfd2ddd5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -32,18 +32,14 @@ from .table cimport Table ctypedef const scalar constscalar -cdef vector[reference_wrapper[const scalar]] _make_scalar_vector(list source): +cdef vector[reference_wrapper[const scalar]] _as_vector(list source): """Make a vector of reference_wrapper[const scalar] from a list of scalars.""" - if not isinstance(source, list) or not isinstance(source[0], Scalar): - raise ValueError("source must be a list[Scalar]") - cdef vector[reference_wrapper[const scalar]] c_scalars c_scalars.reserve(len(source)) cdef Scalar slr for slr in source: c_scalars.push_back( - reference_wrapper[constscalar](dereference(slr.c_obj)) - ) + reference_wrapper[constscalar](dereference((slr).c_obj))) return c_scalars @@ -102,8 +98,8 @@ cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): # TODO: Could generalize list to sequence cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): - cdef vector[reference_wrapper[const scalar]] source_scalars - source_scalars = _make_scalar_vector(source) + cdef vector[reference_wrapper[const scalar]] source_scalars = \ + _as_vector(source) cdef unique_ptr[table] c_result with nogil: @@ -254,7 +250,7 @@ cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean # TODO: Could generalize list to sequence cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask): - source_scalars = _make_scalar_vector(input) + cdef vector[reference_wrapper[const scalar]] source_scalars = _as_vector(input) cdef unique_ptr[table] result with nogil: From b0ae7f0ff7ca31c35b84e0e87723450f16bc1b69 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 14 Dec 2023 23:23:24 +0000 Subject: [PATCH 20/20] Rename functions as per offline discussion --- python/cudf/cudf/_lib/copying.pyx | 12 ++++++------ python/cudf/cudf/_lib/pylibcudf/copying.pxd | 12 ++++++------ python/cudf/cudf/_lib/pylibcudf/copying.pyx | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 54dfd6d12bc..fbe2c8751dd 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -199,13 +199,13 @@ def scatter(list sources, Column scatter_map, list target_columns, ) if isinstance(sources[0], Column): - tbl = pylibcudf.copying.table_scatter( + tbl = pylibcudf.copying.scatter_table( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]), scatter_map.to_pylibcudf(mode="read"), pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), ) else: - tbl = pylibcudf.copying.scalar_scatter( + tbl = pylibcudf.copying.scatter_scalars( [( as_device_scalar(slr)).c_value for slr in sources], scatter_map.to_pylibcudf(mode="read"), pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), @@ -217,7 +217,7 @@ def scatter(list sources, Column scatter_map, list target_columns, @acquire_spill_lock() def column_empty_like(Column input_column): return Column.from_pylibcudf( - pylibcudf.copying.column_empty_like( + pylibcudf.copying.empty_column_like( input_column.to_pylibcudf(mode="read") ) ) @@ -236,7 +236,7 @@ def column_allocate_like(Column input_column, size=None): @acquire_spill_lock() def columns_empty_like(list input_columns): return columns_from_pylibcudf_table( - pylibcudf.copying.table_empty_like( + pylibcudf.copying.empty_table_like( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) ) ) @@ -470,13 +470,13 @@ def boolean_mask_scatter(list input_, list target_columns, return [] if isinstance(input_[0], Column): - tbl = pylibcudf.copying.table_boolean_mask_scatter( + tbl = pylibcudf.copying.boolean_mask_table_scatter( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]), pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), boolean_mask.to_pylibcudf(mode="read"), ) else: - tbl = pylibcudf.copying.scalar_boolean_mask_scatter( + tbl = pylibcudf.copying.boolean_mask_scalars_scatter( [( as_device_scalar(i)).c_value for i in input_], pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), boolean_mask.to_pylibcudf(mode="read"), diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 756d25bcaba..db0e42f5804 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -16,13 +16,13 @@ cpdef Table gather( out_of_bounds_policy bounds_policy ) -cpdef Table table_scatter(Table source, Column scatter_map, Table target_table) +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table) -cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table) +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table) -cpdef object column_empty_like(Column input) +cpdef object empty_column_like(Column input) -cpdef object table_empty_like(Table input) +cpdef object empty_table_like(Table input) cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) @@ -38,6 +38,6 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_values) cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) -cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask) +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask) -cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask) +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 341dfd2ddd5..634aed3e6e5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -81,7 +81,7 @@ cpdef Table gather( return Table.from_libcudf(move(c_result)) -cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table): cdef unique_ptr[table] c_result with nogil: @@ -97,7 +97,7 @@ cpdef Table table_scatter(Table source, Column scatter_map, Table target_table): # TODO: Could generalize list to sequence -cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table): cdef vector[reference_wrapper[const scalar]] source_scalars = \ _as_vector(source) @@ -114,7 +114,7 @@ cpdef Table scalar_scatter(list source, Column scatter_map, Table target_table): return Table.from_libcudf(move(c_result)) -cpdef object column_empty_like(Column input): +cpdef object empty_column_like(Column input): cdef unique_ptr[column] c_column_result with nogil: c_column_result = move( @@ -125,7 +125,7 @@ cpdef object column_empty_like(Column input): return Column.from_libcudf(move(c_column_result)) -cpdef object table_empty_like(Table input): +cpdef object empty_table_like(Table input): cdef unique_ptr[table] c_table_result with nogil: c_table_result = move( @@ -233,7 +233,7 @@ cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): return Column.from_libcudf(move(result)) -cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean_mask): +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask): cdef unique_ptr[table] result with nogil: @@ -249,7 +249,7 @@ cpdef Table table_boolean_mask_scatter(Table input, Table target, Column boolean # TODO: Could generalize list to sequence -cpdef Table scalar_boolean_mask_scatter(list input, Table target, Column boolean_mask): +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask): cdef vector[reference_wrapper[const scalar]] source_scalars = _as_vector(input) cdef unique_ptr[table] result