From f4fbdd0e8536737c82490389c182ae5990b266d9 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 15 Dec 2023 08:14:38 -0800 Subject: [PATCH] Implement more copying APIs in pylibcudf (#14508) This PR implements a handful of libcudf copying APIs in pylibcudf and rewrites cudf Cython internals to use those functions where appropriate. This PR only implements those methods that can be done without any additional changes to the pylibcudf Column class. I'll follow up with other PRs for changes that require those changes. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/cudf/pull/14508 --- cpp/include/cudf/copying.hpp | 2 +- python/cudf/cudf/_lib/copying.pyx | 255 +++++--------------- python/cudf/cudf/_lib/cpp/copying.pxd | 88 +++---- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 30 ++- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 214 +++++++++++++++- python/cudf/cudf/_lib/pylibcudf/scalar.pxd | 2 +- python/cudf/cudf/_lib/pylibcudf/scalar.pyx | 2 +- python/cudf/cudf/_lib/types.pyx | 11 +- 8 files changed, 354 insertions(+), 250 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 63680473c14..b3a8836b193 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -211,7 +211,7 @@ std::unique_ptr scatter( /** * @brief Indicates when to allocate a mask, based on an existing mask. */ -enum class mask_allocation_policy { +enum class mask_allocation_policy : int32_t { NEVER, ///< Do not allocate a null mask, regardless of input RETAIN, ///< Allocate a null mask if the input contains one ALWAYS ///< Allocate a null mask, regardless of input diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index ea6ee76c14a..fbe2c8751dd 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -2,7 +2,7 @@ import pickle -from libc.stdint cimport int32_t, uint8_t, uintptr_t +from libc.stdint cimport uint8_t, uintptr_t from libcpp cimport bool from libcpp.memory cimport make_shared, shared_ptr, unique_ptr from libcpp.utility cimport move @@ -24,7 +24,6 @@ from cudf._lib.utils cimport table_view_from_columns, table_view_from_table from cudf._lib.reduce import minmax from cudf.core.abc import Serializable -from libcpp.functional cimport reference_wrapper from libcpp.memory cimport make_unique cimport cudf._lib.cpp.contiguous_split as cpp_contiguous_split @@ -36,13 +35,11 @@ from cudf._lib.cpp.lists.gather cimport ( ) from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.scalar.scalar cimport scalar -from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.utils cimport ( columns_from_pylibcudf_table, columns_from_table_view, - columns_from_unique_ptr, data_from_table_view, table_view_from_columns, ) @@ -116,25 +113,15 @@ def _copy_range(Column input_column, size_type input_begin, size_type input_end, size_type target_begin): - - cdef column_view input_column_view = input_column.view() - cdef column_view target_column_view = target_column.view() - cdef size_type c_input_begin = input_begin - cdef size_type c_input_end = input_end - cdef size_type c_target_begin = target_begin - - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.copy_range( - input_column_view, - target_column_view, - c_input_begin, - c_input_end, - c_target_begin) + return Column.from_pylibcudf( + pylibcudf.copying.copy_range( + input_column.to_pylibcudf(mode="read"), + target_column.to_pylibcudf(mode="read"), + input_begin, + input_end, + target_begin ) - - return Column.from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -184,48 +171,6 @@ def gather( return columns_from_pylibcudf_table(tbl) -cdef scatter_scalar(list source_device_slrs, - column_view scatter_map, - table_view target_table): - cdef vector[reference_wrapper[constscalar]] c_source - cdef DeviceScalar d_slr - cdef unique_ptr[table] c_result - - c_source.reserve(len(source_device_slrs)) - for d_slr in source_device_slrs: - c_source.push_back( - reference_wrapper[constscalar](d_slr.get_raw_ptr()[0]) - ) - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -cdef scatter_column(list source_columns, - column_view scatter_map, - table_view target_table): - cdef table_view c_source = table_view_from_columns(source_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.scatter( - c_source, - scatter_map, - target_table, - ) - ) - return columns_from_unique_ptr(move(c_result)) - - @acquire_spill_lock() def scatter(list sources, Column scatter_map, list target_columns, bool bounds_check=True): @@ -243,9 +188,6 @@ def scatter(list sources, Column scatter_map, list target_columns, if len(sources) == 0: return [] - cdef column_view scatter_map_view = scatter_map.view() - cdef table_view target_table_view = table_view_from_columns(target_columns) - if bounds_check: n_rows = len(target_columns[0]) if not ( @@ -257,62 +199,47 @@ def scatter(list sources, Column scatter_map, list target_columns, ) if isinstance(sources[0], Column): - return scatter_column( - sources, scatter_map_view, target_table_view + tbl = pylibcudf.copying.scatter_table( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]), + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), ) else: - source_scalars = [as_device_scalar(slr) for slr in sources] - return scatter_scalar( - source_scalars, scatter_map_view, target_table_view + tbl = pylibcudf.copying.scatter_scalars( + [( as_device_scalar(slr)).c_value for slr in sources], + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), ) + return columns_from_pylibcudf_table(tbl) + @acquire_spill_lock() def column_empty_like(Column input_column): - - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_column_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.empty_column_like( + input_column.to_pylibcudf(mode="read") + ) + ) @acquire_spill_lock() def column_allocate_like(Column input_column, size=None): - - cdef size_type c_size = 0 - cdef column_view input_column_view = input_column.view() - cdef unique_ptr[column] c_result - - if size is None: - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - cpp_copying.mask_allocation_policy.RETAIN) - ) - else: - c_size = size - with nogil: - c_result = move(cpp_copying.allocate_like( - input_column_view, - c_size, - cpp_copying.mask_allocation_policy.RETAIN) - ) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + pylibcudf.copying.allocate_like( + input_column.to_pylibcudf(mode="read"), + size, + ) + ) @acquire_spill_lock() def columns_empty_like(list input_columns): - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_copying.empty_like(input_table_view)) - - return columns_from_unique_ptr(move(c_result)) + return columns_from_pylibcudf_table( + pylibcudf.copying.empty_table_like( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) + ) + ) @acquire_spill_lock() @@ -513,70 +440,15 @@ def _copy_if_else_scalar_scalar(DeviceScalar lhs, @acquire_spill_lock() def copy_if_else(object lhs, object rhs, Column boolean_mask): - - if isinstance(lhs, Column): - if isinstance(rhs, Column): - return _copy_if_else_column_column(lhs, rhs, boolean_mask) - else: - return _copy_if_else_column_scalar( - lhs, as_device_scalar(rhs), boolean_mask) - else: - if isinstance(rhs, Column): - return _copy_if_else_scalar_column( - as_device_scalar(lhs), rhs, boolean_mask) - else: - if lhs is None and rhs is None: - return lhs - - return _copy_if_else_scalar_scalar( - as_device_scalar(lhs), as_device_scalar(rhs), boolean_mask) - - -def _boolean_mask_scatter_columns(list input_columns, list target_columns, - Column boolean_mask): - - cdef table_view input_table_view = table_view_from_columns(input_columns) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_table_view, - target_table_view, - boolean_mask_view - ) - ) - - return columns_from_unique_ptr(move(c_result)) - - -def _boolean_mask_scatter_scalar(list input_scalars, list target_columns, - Column boolean_mask): - - cdef vector[reference_wrapper[constscalar]] input_scalar_vector - input_scalar_vector.reserve(len(input_scalars)) - cdef DeviceScalar scl - for scl in input_scalars: - input_scalar_vector.push_back(reference_wrapper[constscalar]( - scl.get_raw_ptr()[0])) - cdef table_view target_table_view = table_view_from_columns(target_columns) - cdef column_view boolean_mask_view = boolean_mask.view() - - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.boolean_mask_scatter( - input_scalar_vector, - target_table_view, - boolean_mask_view - ) + return Column.from_pylibcudf( + pylibcudf.copying.copy_if_else( + lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column) + else ( as_device_scalar(lhs)).c_value, + rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column) + else ( as_device_scalar(rhs)).c_value, + boolean_mask.to_pylibcudf(mode="read"), ) - - return columns_from_unique_ptr(move(c_result)) + ) @acquire_spill_lock() @@ -598,23 +470,23 @@ def boolean_mask_scatter(list input_, list target_columns, return [] if isinstance(input_[0], Column): - return _boolean_mask_scatter_columns( - input_, - target_columns, - boolean_mask + tbl = pylibcudf.copying.boolean_mask_table_scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), ) else: - scalar_list = [as_device_scalar(i) for i in input_] - return _boolean_mask_scatter_scalar( - scalar_list, - target_columns, - boolean_mask + tbl = pylibcudf.copying.boolean_mask_scalars_scatter( + [( as_device_scalar(i)).c_value for i in input_], + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), ) + return columns_from_pylibcudf_table(tbl) + @acquire_spill_lock() def shift(Column input, int offset, object fill_value=None): - cdef DeviceScalar fill if isinstance(fill_value, DeviceScalar): @@ -622,21 +494,12 @@ def shift(Column input, int offset, object fill_value=None): else: fill = as_device_scalar(fill_value, input.dtype) - cdef column_view c_input = input.view() - cdef int32_t c_offset = offset - cdef const scalar* c_fill_value = fill.get_raw_ptr() - cdef unique_ptr[column] c_output - - with nogil: - c_output = move( - cpp_copying.shift( - c_input, - c_offset, - c_fill_value[0] - ) - ) - - return Column.from_unique_ptr(move(c_output)) + col = pylibcudf.copying.shift( + input.to_pylibcudf(mode="read"), + offset, + fill.c_value, + ) + return Column.from_pylibcudf(col) @acquire_spill_lock() diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index 5637b55ac1c..f3e5c0aec72 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -36,118 +36,118 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ) except + cdef unique_ptr[table] scatter ( - table_view source_table, - column_view scatter_map, - table_view target_table, + const table_view& source_table, + const column_view& scatter_map, + const table_view& target_table, ) except + cdef unique_ptr[table] scatter ( - vector[reference_wrapper[constscalar]] source_scalars, - column_view indices, - table_view target, + const vector[reference_wrapper[constscalar]]& source_scalars, + const column_view& indices, + const table_view& target, ) except + - ctypedef enum mask_allocation_policy: - NEVER 'cudf::mask_allocation_policy::NEVER', - RETAIN 'cudf::mask_allocation_policy::RETAIN', - ALWAYS 'cudf::mask_allocation_policy::ALWAYS' + cpdef enum class mask_allocation_policy(int32_t): + NEVER + RETAIN + ALWAYS cdef unique_ptr[column] empty_like ( - column_view input_column + const column_view& input_column ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view& input_column, mask_allocation_policy policy ) except + cdef unique_ptr[column] allocate_like ( - column_view input_column, + const column_view& input_column, size_type size, mask_allocation_policy policy ) except + cdef unique_ptr[table] empty_like ( - table_view input_table + const table_view& input_table ) except + cdef void copy_range_in_place ( - column_view input_column, - mutable_column_view target_column, + const column_view& input_column, + mutable_column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef unique_ptr[column] copy_range ( - column_view input_column, - column_view target_column, + const column_view& input_column, + const column_view& target_column, size_type input_begin, size_type input_end, size_type target_begin ) except + cdef vector[column_view] slice ( - column_view input_column, + const column_view& input_column, vector[size_type] indices ) except + cdef vector[table_view] slice ( - table_view input_table, + const table_view& input_table, vector[size_type] indices ) except + cdef vector[column_view] split ( - column_view input_column, + const column_view& input_column, vector[size_type] splits ) except + cdef vector[table_view] split ( - table_view input_table, + const table_view& input_table, vector[size_type] splits ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - column_view rhs, - column_view boolean_mask + const column_view& lhs, + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - column_view rhs, - column_view boolean_mask + const scalar& lhs, + const column_view& rhs, + const column_view& boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - column_view lhs, - scalar rhs, - column_view boolean_mask + const column_view& lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[column] copy_if_else ( - scalar lhs, - scalar rhs, - column_view boolean_mask + const scalar& lhs, + const scalar& rhs, + const column_view boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - table_view input, - table_view target, - column_view boolean_mask + const table_view& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[table] boolean_mask_scatter ( - vector[reference_wrapper[constscalar]] input, - table_view target, - column_view boolean_mask + const vector[reference_wrapper[constscalar]]& input, + const table_view& target, + const column_view& boolean_mask ) except + cdef unique_ptr[scalar] get_element ( - column_view input, + const column_view& input, size_type index ) except + - ctypedef enum sample_with_replacement: - FALSE 'cudf::sample_with_replacement::FALSE', - TRUE 'cudf::sample_with_replacement::TRUE', + cpdef enum class sample_with_replacement(bool): + FALSE + TRUE diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index d57be650710..db0e42f5804 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -2,9 +2,11 @@ from libcpp cimport bool as cbool -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy +from cudf._lib.cpp.types cimport size_type from .column cimport Column +from .scalar cimport Scalar from .table cimport Table @@ -13,3 +15,29 @@ cpdef Table gather( Column gather_map, out_of_bounds_policy bounds_policy ) + +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table) + +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table) + +cpdef object empty_column_like(Column input) + +cpdef object empty_table_like(Table input) + +cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +) + +cpdef Column shift(Column input, size_type offset, Scalar fill_values) + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) + +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask) + +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index a27b44b3107..634aed3e6e5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -1,23 +1,47 @@ # Copyright (c) 2023, NVIDIA CORPORATION. +from cython.operator import dereference + +from libcpp.functional cimport reference_wrapper from libcpp.memory cimport unique_ptr from libcpp.utility cimport move +from libcpp.vector cimport vector # TODO: We want to make cpp a more full-featured package so that we can access # directly from that. It will make namespacing much cleaner in pylibcudf. What # we really want here would be # cimport libcudf... libcudf.copying.algo(...) from cudf._lib.cpp cimport copying as cpp_copying -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.copying cimport mask_allocation_policy, out_of_bounds_policy +from cudf._lib.cpp.scalar.scalar cimport scalar +from cudf._lib.cpp.table.table cimport table +from cudf._lib.cpp.types cimport size_type +from cudf._lib.cpp.copying import \ + mask_allocation_policy as MaskAllocationPolicy # no-cython-lint from cudf._lib.cpp.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint -from cudf._lib.cpp.table.table cimport table - from .column cimport Column from .table cimport Table +# This is a workaround for +# https://github.com/cython/cython/issues/4180 +# when creating reference_wrapper[constscalar] in the constructor +ctypedef const scalar constscalar + + +cdef vector[reference_wrapper[const scalar]] _as_vector(list source): + """Make a vector of reference_wrapper[const scalar] from a list of scalars.""" + cdef vector[reference_wrapper[const scalar]] c_scalars + c_scalars.reserve(len(source)) + cdef Scalar slr + for slr in source: + c_scalars.push_back( + reference_wrapper[constscalar](dereference((slr).c_obj))) + return c_scalars + # TODO: Is it OK to reference the corresponding libcudf algorithm in the # documentation? Otherwise there's a lot of room for duplication. @@ -55,3 +79,187 @@ cpdef Table gather( ) ) return Table.from_libcudf(move(c_result)) + + +cpdef Table scatter_table(Table source, Column scatter_map, Table target_table): + cdef unique_ptr[table] c_result + + with nogil: + c_result = move( + cpp_copying.scatter( + source.view(), + scatter_map.view(), + target_table.view(), + ) + ) + + return Table.from_libcudf(move(c_result)) + + +# TODO: Could generalize list to sequence +cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table): + cdef vector[reference_wrapper[const scalar]] source_scalars = \ + _as_vector(source) + + cdef unique_ptr[table] c_result + with nogil: + c_result = move( + cpp_copying.scatter( + source_scalars, + scatter_map.view(), + target_table.view(), + ) + ) + + return Table.from_libcudf(move(c_result)) + + +cpdef object empty_column_like(Column input): + cdef unique_ptr[column] c_column_result + with nogil: + c_column_result = move( + cpp_copying.empty_like( + ( input).view(), + ) + ) + return Column.from_libcudf(move(c_column_result)) + + +cpdef object empty_table_like(Table input): + cdef unique_ptr[table] c_table_result + with nogil: + c_table_result = move( + cpp_copying.empty_like( + (
input).view(), + ) + ) + return Table.from_libcudf(move(c_table_result)) + + +cpdef Column allocate_like( + Column input_column, mask_allocation_policy policy, size=None +): + cdef unique_ptr[column] c_result + cdef size_type c_size = size if size is not None else input_column.size() + + with nogil: + c_result = move( + cpp_copying.allocate_like( + input_column.view(), + c_size, + policy, + ) + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column copy_range( + Column input_column, + Column target_column, + size_type input_begin, + size_type input_end, + size_type target_begin, +): + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_copying.copy_range( + input_column.view(), + target_column.view(), + input_begin, + input_end, + target_begin) + ) + + return Column.from_libcudf(move(c_result)) + + +cpdef Column shift(Column input, size_type offset, Scalar fill_values): + cdef unique_ptr[column] c_result + with nogil: + c_result = move( + cpp_copying.shift( + input.view(), + offset, + dereference(fill_values.c_obj) + ) + ) + return Column.from_libcudf(move(c_result)) + + +cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): + cdef unique_ptr[column] result + + if isinstance(lhs, Column) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Column) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + ( lhs).view(), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Column): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + ( rhs).view(), + boolean_mask.view() + ) + ) + elif isinstance(lhs, Scalar) and isinstance(rhs, Scalar): + with nogil: + result = move( + cpp_copying.copy_if_else( + dereference(( lhs).c_obj), + dereference(( rhs).c_obj), + boolean_mask.view() + ) + ) + else: + raise ValueError(f"Invalid arguments {lhs} and {rhs}") + + return Column.from_libcudf(move(result)) + + +cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask): + cdef unique_ptr[table] result + + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + (
input).view(), + target.view(), + boolean_mask.view() + ) + ) + + return Table.from_libcudf(move(result)) + + +# TODO: Could generalize list to sequence +cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask): + cdef vector[reference_wrapper[const scalar]] source_scalars = _as_vector(input) + + cdef unique_ptr[table] result + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + source_scalars, + target.view(), + boolean_mask.view(), + ) + ) + + return Table.from_libcudf(move(result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd index 09d853d832f..0edc934ca22 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pxd @@ -21,7 +21,7 @@ cdef class Scalar: # needed for deallocation cdef DeviceMemoryResource mr - cdef const scalar* get(self) except * + cdef const scalar* get(self) noexcept nogil cpdef DataType type(self) cpdef bool is_valid(self) diff --git a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx index 04f588bd3e6..965f10999f2 100644 --- a/python/cudf/cudf/_lib/pylibcudf/scalar.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/scalar.pyx @@ -108,7 +108,7 @@ cdef class Scalar: return pa.pyarrow_wrap_scalar(c_result) - cdef const scalar* get(self) except *: + cdef const scalar* get(self) noexcept nogil: return self.c_obj.get() cpdef DataType type(self): diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 929f8b447ab..d87104bf168 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -3,6 +3,7 @@ from enum import IntEnum import numpy as np +import pandas as pd from libcpp.memory cimport make_shared, shared_ptr @@ -270,9 +271,13 @@ cpdef dtype_to_pylibcudf_type(dtype): else: tid = pylibcudf.TypeId.DECIMAL32 return pylibcudf.DataType(tid, -dtype.scale) - return pylibcudf.DataType( - SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[np.dtype(dtype)] - ) + + # libcudf types don't support localization so convert to the base type + if isinstance(dtype, pd.DatetimeTZDtype): + dtype = np.dtype(f"