From ba50daeadb8281486532666c766f0abd6d7f3209 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sun, 4 Feb 2024 08:42:16 +0000 Subject: [PATCH 1/3] Implement binaryop using fused types --- python/cudf/cudf/_lib/pylibcudf/binaryop.pxd | 14 +++++++-- python/cudf/cudf/_lib/pylibcudf/binaryop.pyx | 32 ++++++++++---------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd b/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd index 56b98333757..a0bcbfd00b2 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pxd @@ -3,12 +3,22 @@ from cudf._lib.cpp.binaryop cimport binary_operator from .column cimport Column +from .scalar cimport Scalar from .types cimport DataType +# Need two separate fused types to generate the cartesian product of signatures. +ctypedef fused LeftBinaryOperand: + Column + Scalar + +ctypedef fused RightBinaryOperand: + Column + Scalar + cpdef Column binary_operation( - object lhs, - object rhs, + LeftBinaryOperand lhs, + RightBinaryOperand rhs, binary_operator op, DataType data_type ) diff --git a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx index af248ba2071..4675fae90b0 100644 --- a/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/binaryop.pyx @@ -18,25 +18,25 @@ from .types cimport DataType cpdef Column binary_operation( - object lhs, - object rhs, + LeftBinaryOperand lhs, + RightBinaryOperand rhs, binary_operator op, DataType data_type ): """Perform a binary operation between a column and another column or scalar. - Either ``lhs`` or ``rhs`` must be a - :py:class:`~cudf._lib.pylibcudf.column.Column`. The other may be a + ``lhs`` and ``rhs`` may be a :py:class:`~cudf._lib.pylibcudf.column.Column` or a - :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`. + :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`, but at least one must be a + :py:class:`~cudf._lib.pylibcudf.column.Column`. For details, see :cpp:func:`binary_operation`. Parameters ---------- - lhs : Column or Scalar + lhs : Union[Column, Scalar] The left hand side argument. - rhs : Column or Scalar + rhs : Union[Column, Scalar] The right hand side argument. op : BinaryOperator The operation to perform. @@ -50,32 +50,32 @@ cpdef Column binary_operation( """ cdef unique_ptr[column] result - if isinstance(lhs, Column) and isinstance(rhs, Column): + if LeftBinaryOperand is Column and RightBinaryOperand is Column: with nogil: result = move( cpp_binaryop.binary_operation( - ( lhs).view(), - ( rhs).view(), + lhs.view(), + rhs.view(), op, data_type.c_obj ) ) - elif isinstance(lhs, Column) and isinstance(rhs, Scalar): + elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar: with nogil: result = move( cpp_binaryop.binary_operation( - ( lhs).view(), - dereference(( rhs).c_obj), + lhs.view(), + dereference(rhs.c_obj), op, data_type.c_obj ) ) - elif isinstance(lhs, Scalar) and isinstance(rhs, Column): + elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column: with nogil: result = move( cpp_binaryop.binary_operation( - dereference(( lhs).c_obj), - ( rhs).view(), + dereference(lhs.c_obj), + rhs.view(), op, data_type.c_obj ) From 982e78d0c5380e22efb80c7cfe76ca0476666b27 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sun, 4 Feb 2024 08:45:49 +0000 Subject: [PATCH 2/3] Use fused types for copying APIs --- python/cudf/cudf/_lib/copying.pyx | 52 ++- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 52 ++- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 395 +++++++------------- 3 files changed, 190 insertions(+), 309 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 8eb0500617f..6a52af520f0 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import pickle @@ -184,18 +184,13 @@ def scatter(list sources, Column scatter_map, list target_columns, f"index out of bounds for column of size {n_rows}" ) - if isinstance(sources[0], Column): - tbl = pylibcudf.copying.scatter_table( - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]), - scatter_map.to_pylibcudf(mode="read"), - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - ) - else: - tbl = pylibcudf.copying.scatter_scalars( - [( as_device_scalar(slr)).c_value for slr in sources], - scatter_map.to_pylibcudf(mode="read"), - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - ) + tbl = pylibcudf.copying.scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]) + if isinstance(sources[0], Column) + else [( as_device_scalar(slr)).c_value for slr in sources], + scatter_map.to_pylibcudf(mode="read"), + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + ) return columns_from_pylibcudf_table(tbl) @@ -203,7 +198,7 @@ def scatter(list sources, Column scatter_map, list target_columns, @acquire_spill_lock() def column_empty_like(Column input_column): return Column.from_pylibcudf( - pylibcudf.copying.empty_column_like( + pylibcudf.copying.empty_like( input_column.to_pylibcudf(mode="read") ) ) @@ -222,7 +217,7 @@ def column_allocate_like(Column input_column, size=None): @acquire_spill_lock() def columns_empty_like(list input_columns): return columns_from_pylibcudf_table( - pylibcudf.copying.empty_table_like( + pylibcudf.copying.empty_like( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]) ) ) @@ -232,7 +227,7 @@ def columns_empty_like(list input_columns): def column_slice(Column input_column, object indices): return [ Column.from_pylibcudf(c) - for c in pylibcudf.copying.column_slice( + for c in pylibcudf.copying.slice( input_column.to_pylibcudf(mode="read"), list(indices), ) @@ -243,7 +238,7 @@ def column_slice(Column input_column, object indices): def columns_slice(list input_columns, object indices): return [ columns_from_pylibcudf_table(tbl) - for tbl in pylibcudf.copying.table_slice( + for tbl in pylibcudf.copying.slice( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]), list(indices), ) @@ -254,7 +249,7 @@ def columns_slice(list input_columns, object indices): def column_split(Column input_column, object splits): return [ Column.from_pylibcudf(c) - for c in pylibcudf.copying.column_split( + for c in pylibcudf.copying.split( input_column.to_pylibcudf(mode="read"), list(splits), ) @@ -265,7 +260,7 @@ def column_split(Column input_column, object splits): def columns_split(list input_columns, object splits): return [ columns_from_pylibcudf_table(tbl) - for tbl in pylibcudf.copying.table_split( + for tbl in pylibcudf.copying.split( pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]), list(splits), ) @@ -303,18 +298,13 @@ def boolean_mask_scatter(list input_, list target_columns, if len(input_) == 0: return [] - if isinstance(input_[0], Column): - tbl = pylibcudf.copying.boolean_mask_table_scatter( - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]), - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - boolean_mask.to_pylibcudf(mode="read"), - ) - else: - tbl = pylibcudf.copying.boolean_mask_scalars_scatter( - [( as_device_scalar(i)).c_value for i in input_], - pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), - boolean_mask.to_pylibcudf(mode="read"), - ) + tbl = pylibcudf.copying.boolean_mask_scatter( + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]) + if isinstance(input_[0], Column) + else [( as_device_scalar(i)).c_value for i in input_], + pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]), + boolean_mask.to_pylibcudf(mode="read"), + ) return columns_from_pylibcudf_table(tbl) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 3567df9ac9c..7b5f1e70ea3 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. from libcpp cimport bool as cbool @@ -9,6 +9,26 @@ from .column cimport Column from .scalar cimport Scalar from .table cimport Table +ctypedef fused ColumnOrTable: + Table + Column + + +ctypedef fused TableOrListOfScalars: + Table + # The contents of the list must be validated as Scalars at runtime. + list + + +# Need two separate fused types to generate the cartesian product of signatures. +ctypedef fused LeftCopyIfElseOperand: + Column + Scalar + +ctypedef fused RightCopyIfElseOperand: + Column + Scalar + cpdef Table gather( Table source_table, @@ -16,13 +36,9 @@ cpdef Table gather( out_of_bounds_policy bounds_policy ) -cpdef Table scatter_table(Table source, Column scatter_map, Table target_table) - -cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table) +cpdef Table scatter(TableOrListOfScalars source, Column scatter_map, Table target_table) -cpdef object empty_column_like(Column input) - -cpdef object empty_table_like(Table input) +cpdef ColumnOrTable empty_like(ColumnOrTable input) cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*) @@ -44,18 +60,20 @@ cpdef Column copy_range( cpdef Column shift(Column input, size_type offset, Scalar fill_values) -cpdef list column_split(Column input_column, list splits) - -cpdef list table_split(Table input_table, list splits) - -cpdef list column_slice(Column input_column, list indices) +cpdef list split(ColumnOrTable input, list splits) -cpdef list table_slice(Table input_table, list indices) +cpdef list slice(ColumnOrTable input, list indices) -cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask) - -cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask) +cpdef Column copy_if_else( + LeftCopyIfElseOperand lhs, + RightCopyIfElseOperand rhs, + Column boolean_mask +) -cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask) +cpdef Table boolean_mask_scatter( + TableOrListOfScalars input, + Table target, + Column boolean_mask +) cpdef Scalar get_element(Column input_column, size_type index) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 12e592f3a92..d78955dc325 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -67,49 +67,22 @@ cpdef Table gather( return Table.from_libcudf(move(c_result)) -cpdef Table scatter_table(Table source, Column scatter_map, Table target_table): - """Scatter rows from source into target_table according to scatter_map. - - For details, see :cpp:func:`scatter`. - - Parameters - ---------- - source : Table - The table object from which to pull data. - scatter_map : Column - A mapping from rows in source to rows in target_table. - target_table : Table - The table object into which to scatter data. - - Returns - ------- - pylibcudf.Table - The result of the scatter - """ - cdef unique_ptr[table] c_result - - with nogil: - c_result = move( - cpp_copying.scatter( - source.view(), - scatter_map.view(), - target_table.view(), - ) - ) - - return Table.from_libcudf(move(c_result)) - +cpdef Table scatter( + TableOrListOfScalars source, + Column scatter_map, + Table target_table +): + """Scatter from source into target_table according to scatter_map. -# TODO: Could generalize list to sequence -cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table): - """Scatter scalars from source into target_table according to scatter_map. + If source is a table, it specifies rows to scatter. If source is a list, + each scalar is scattered into the corresponding column in the ``target_table``. For details, see :cpp:func:`scatter`. Parameters ---------- - source : List[Scalar] - A list of scalars to scatter into target_table. + source : Union[Table, List[Scalar]] + The table object or list of scalars from which to pull data. scatter_map : Column A mapping from rows in source to rows in target_table. target_table : Table @@ -117,73 +90,58 @@ cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table) Returns ------- - pylibcudf.Table + Table The result of the scatter """ - cdef vector[reference_wrapper[const scalar]] source_scalars = \ - _as_vector(source) - cdef unique_ptr[table] c_result - with nogil: - c_result = move( - cpp_copying.scatter( - source_scalars, - scatter_map.view(), - target_table.view(), + cdef vector[reference_wrapper[const scalar]] source_scalars + if TableOrListOfScalars is Table: + with nogil: + c_result = move( + cpp_copying.scatter( + source.view(), + scatter_map.view(), + target_table.view(), + ) ) - ) - - return Table.from_libcudf(move(c_result)) - - -cpdef object empty_column_like(Column input): - """Create an empty column with the same type as input. - - For details, see :cpp:func:`empty_like`. - - Parameters - ---------- - input : Column - The column to use as a template for the output. - - Returns - ------- - pylibcudf.Column - An empty column with the same type as input. - """ - cdef unique_ptr[column] c_column_result - with nogil: - c_column_result = move( - cpp_copying.empty_like( - ( input).view(), + else: + source_scalars = _as_vector(source) + with nogil: + c_result = move( + cpp_copying.scatter( + source_scalars, + scatter_map.view(), + target_table.view(), + ) ) - ) - return Column.from_libcudf(move(c_column_result)) + return Table.from_libcudf(move(c_result)) -cpdef object empty_table_like(Table input): - """Create an empty table with the same type as input. +cpdef ColumnOrTable empty_like(ColumnOrTable input): + """Create an empty column or table with the same type as ``input``. For details, see :cpp:func:`empty_like`. Parameters ---------- - input : Table - The table to use as a template for the output. + input : Union[Column, Table] + The column or table to use as a template for the output. Returns ------- - pylibcudf.Table - An empty table with the same type as input. + Union[Column, Table] + An empty column or table with the same type(s) as ``input``. """ - cdef unique_ptr[table] c_table_result - with nogil: - c_table_result = move( - cpp_copying.empty_like( - ( input).view(), - ) - ) - return Table.from_libcudf(move(c_table_result)) + cdef unique_ptr[table] c_tbl_result + cdef unique_ptr[column] c_col_result + if ColumnOrTable is Column: + with nogil: + c_col_result = move(cpp_copying.empty_like(input.view())) + return Column.from_libcudf(move(c_col_result)) + else: + with nogil: + c_tbl_result = move(cpp_copying.empty_like(input.view())) + return Table.from_libcudf(move(c_tbl_result)) cpdef Column allocate_like( @@ -340,157 +298,100 @@ cpdef Column shift(Column input, size_type offset, Scalar fill_values): return Column.from_libcudf(move(c_result)) -cpdef list column_split(Column input_column, list splits): - """Split input_column into multiple columns. +cpdef list split(ColumnOrTable input, list splits): + """Split input into multiple. For details on the implementation, see :cpp:func:`split`. Parameters ---------- - input_column : Column + input : Union[Column, Table] The column to split. splits : List[int] The indices at which to split the column. Returns ------- - List[pylibcudf.Column] - The result of splitting input_column. + List[Union[Column, Table]] + The result of splitting input. """ - cdef vector[size_type] c_splits - cdef int split - for split in splits: - c_splits.push_back(split) - - cdef vector[column_view] c_result - with nogil: - c_result = move( - cpp_copying.split( - input_column.view(), - c_splits - ) - ) - + cdef vector[size_type] c_splits = splits + cdef vector[column_view] c_col_result + cdef vector[table_view] c_tbl_result cdef int i - return [ - Column.from_column_view(c_result[i], input_column) - for i in range(c_result.size()) - ] + if ColumnOrTable is Column: + with nogil: + c_col_result = move(cpp_copying.split(input.view(), c_splits)) -cpdef list table_split(Table input_table, list splits): - """Split input_table into multiple tables. - - For details on the implementation, see :cpp:func:`split`. - - Parameters - ---------- - input_table : Table - The table to split. - splits : List[int] - The indices at which to split the table. - - Returns - ------- - List[pylibcudf.Table] - The result of splitting input_table. - """ - cdef vector[size_type] c_splits = splits - cdef vector[table_view] c_result - with nogil: - c_result = move( - cpp_copying.split( - input_table.view(), - c_splits - ) - ) + return [ + Column.from_column_view(c_col_result[i], input) + for i in range(c_col_result.size()) + ] + else: + with nogil: + c_tbl_result = move(cpp_copying.split(input.view(), c_splits)) - cdef int i - return [ - Table.from_table_view(c_result[i], input_table) - for i in range(c_result.size()) - ] + return [ + Table.from_table_view(c_tbl_result[i], input) + for i in range(c_tbl_result.size()) + ] -cpdef list column_slice(Column input_column, list indices): - """Slice input_column according to indices. +cpdef list slice(ColumnOrTable input, list indices): + """Slice input according to indices. For details on the implementation, see :cpp:func:`slice`. Parameters ---------- - input_column : Column - The column to slice. + input_column : Union[Column, Table] + The column or table to slice. indices : List[int] - The indices to select from input_column. + The indices to select from input. Returns ------- - List[pylibcudf.Column] - The result of slicing input_column. + List[Union[Column, Table]] + The result of slicing ``input``. """ cdef vector[size_type] c_indices = indices - cdef vector[column_view] c_result - with nogil: - c_result = move( - cpp_copying.slice( - input_column.view(), - c_indices - ) - ) - + cdef vector[column_view] c_col_result + cdef vector[table_view] c_tbl_result cdef int i - return [ - Column.from_column_view(c_result[i], input_column) - for i in range(c_result.size()) - ] - - -cpdef list table_slice(Table input_table, list indices): - """Slice input_table according to indices. - - For details on the implementation, see :cpp:func:`slice`. - - Parameters - ---------- - input_table : Table - The table to slice. - indices : List[int] - The indices to select from input_table. + if ColumnOrTable is Column: + with nogil: + c_col_result = move(cpp_copying.slice(input.view(), c_indices)) - Returns - ------- - List[pylibcudf.Table] - The result of slicing input_table. - """ - cdef vector[size_type] c_indices = indices - cdef vector[table_view] c_result - with nogil: - c_result = move( - cpp_copying.slice( - input_table.view(), - c_indices - ) - ) + return [ + Column.from_column_view(c_col_result[i], input) + for i in range(c_col_result.size()) + ] + else: + with nogil: + c_tbl_result = move(cpp_copying.slice(input.view(), c_indices)) - cdef int i - return [ - Table.from_table_view(c_result[i], input_table) - for i in range(c_result.size()) - ] + return [ + Table.from_table_view(c_tbl_result[i], input) + for i in range(c_tbl_result.size()) + ] -cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): +cpdef Column copy_if_else( + LeftCopyIfElseOperand lhs, + RightCopyIfElseOperand rhs, + Column boolean_mask +): """Copy elements from lhs or rhs into a new column according to boolean_mask. For details on the implementation, see :cpp:func:`copy_if_else`. Parameters ---------- - lhs : Column or Scalar + lhs : Union[Column, Scalar] The column or scalar to copy from if the corresponding element in boolean_mask is True. - rhs : Column or Scalar + rhs : Union[Column, Scalar] The column or scalar to copy from if the corresponding element in boolean_mask is False. boolean_mask : Column @@ -503,56 +404,51 @@ cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask): """ cdef unique_ptr[column] result - if isinstance(lhs, Column) and isinstance(rhs, Column): + if LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Column: with nogil: result = move( - cpp_copying.copy_if_else( - ( lhs).view(), - ( rhs).view(), - boolean_mask.view() - ) + cpp_copying.copy_if_else(lhs.view(), rhs.view(), boolean_mask.view()) ) - elif isinstance(lhs, Column) and isinstance(rhs, Scalar): + elif LeftCopyIfElseOperand is Column and RightCopyIfElseOperand is Scalar: with nogil: result = move( cpp_copying.copy_if_else( - ( lhs).view(), - dereference(( rhs).c_obj), - boolean_mask.view() + lhs.view(), dereference(rhs.c_obj), boolean_mask.view() ) ) - elif isinstance(lhs, Scalar) and isinstance(rhs, Column): + elif LeftCopyIfElseOperand is Scalar and RightCopyIfElseOperand is Column: with nogil: result = move( cpp_copying.copy_if_else( - dereference(( lhs).c_obj), - ( rhs).view(), - boolean_mask.view() + dereference(lhs.c_obj), rhs.view(), boolean_mask.view() ) ) - elif isinstance(lhs, Scalar) and isinstance(rhs, Scalar): + else: with nogil: result = move( cpp_copying.copy_if_else( - dereference(( lhs).c_obj), - dereference(( rhs).c_obj), - boolean_mask.view() + dereference(lhs.c_obj), dereference(rhs.c_obj), boolean_mask.view() ) ) - else: - raise ValueError(f"Invalid arguments {lhs} and {rhs}") return Column.from_libcudf(move(result)) -cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask): +cpdef Table boolean_mask_scatter( + TableOrListOfScalars input, + Table target, + Column boolean_mask +): """Scatter rows from input into target according to boolean_mask. + If source is a table, it specifies rows to scatter. If source is a list, + each scalar is scattered into the corresponding column in the ``target_table``. + For details on the implementation, see :cpp:func:`boolean_mask_scatter`. Parameters ---------- - input : Table + input : Union[Table, List[Scalar]] The table object from which to pull data. target : Table The table object into which to scatter data. @@ -561,54 +457,31 @@ cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean Returns ------- - pylibcudf.Table + Table The result of the scatter """ cdef unique_ptr[table] result + cdef vector[reference_wrapper[const scalar]] source_scalars - with nogil: - result = move( - cpp_copying.boolean_mask_scatter( - (
input).view(), - target.view(), - boolean_mask.view() + if TableOrListOfScalars is Table: + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + input.view(), + target.view(), + boolean_mask.view() + ) ) - ) - - return Table.from_libcudf(move(result)) - - -# TODO: Could generalize list to sequence -cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask): - """Scatter scalars from input into target according to boolean_mask. - - For details on the implementation, see :cpp:func:`boolean_mask_scatter`. - - Parameters - ---------- - input : List[Scalar] - A list of scalars to scatter into target. - target : Table - The table object into which to scatter data. - boolean_mask : Column - A mapping from rows in input to rows in target. - - Returns - ------- - pylibcudf.Table - The result of the scatter - """ - cdef vector[reference_wrapper[const scalar]] source_scalars = _as_vector(input) - - cdef unique_ptr[table] result - with nogil: - result = move( - cpp_copying.boolean_mask_scatter( - source_scalars, - target.view(), - boolean_mask.view(), + else: + source_scalars = _as_vector(input) + with nogil: + result = move( + cpp_copying.boolean_mask_scatter( + source_scalars, + target.view(), + boolean_mask.view(), + ) ) - ) return Table.from_libcudf(move(result)) From 4f7ca1e086adde37412ada705d1aeda7aee7d706 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Sun, 4 Feb 2024 08:55:47 +0000 Subject: [PATCH 3/3] Document use of fused types --- docs/cudf/source/developer_guide/pylibcudf.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md index 1b321dbb1fe..0120cbb286e 100644 --- a/docs/cudf/source/developer_guide/pylibcudf.md +++ b/docs/cudf/source/developer_guide/pylibcudf.md @@ -153,3 +153,26 @@ from cudf._lib.cpp.copying cimport out_of_bounds_policy from cudf._lib.cpp.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint ``` + +### Handling overloaded functions in libcudf +As a C++ library, libcudf makes extensive use of function overloading. +For example, both of the following functions exist in libcudf: +```cpp +std::unique_ptr
empty_like(table_view const& input_table); +std::unique_ptr empty_like(column_view const& input); +``` + +However, Cython does not directly support overloading in this way, instead following Pythonic semantics where every function name must uniquely identify the function. +Therefore, Cython's [fused types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html) should be used when implementing pylibcudf wrappers of overloaded functions like the above. +Fused types are Cython's version of generic programming and in this case amount to writing templated functions that compile into separate copies corresponding to the different C++ overloads. +For the above functions, the equivalent Cython function is +```cython +ctypedef fused ColumnOrTable: + Table + Column + +cpdef ColumnOrTable empty_like(ColumnOrTable input) +``` + +[Cython supports specializing the contents of fused-type functions based on the argument types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html#type-checking-specializations), so any type-specific logic may be encoded using the appropriate conditionals. +See the pylibcudf source for examples of how to implement such functions.