rapidsai · rapids-bot · Feb 5, 2024 · Feb 4, 2024 · Feb 4, 2024 · Feb 4, 2024
diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md
@@ -153,3 +153,26 @@ from cudf._lib.cpp.copying cimport out_of_bounds_policy
 from cudf._lib.cpp.copying import \
     out_of_bounds_policy as OutOfBoundsPolicy  # no-cython-lint
 ```
+
+### Handling overloaded functions in libcudf
+As a C++ library, libcudf makes extensive use of function overloading.
+For example, both of the following functions exist in libcudf:
+```cpp
+std::unique_ptr<table> empty_like(table_view const& input_table);
+std::unique_ptr<column> empty_like(column_view const& input);
+```
+
+However, Cython does not directly support overloading in this way, instead following Pythonic semantics where every function name must uniquely identify the function.
+Therefore, Cython's [fused types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html) should be used when implementing pylibcudf wrappers of overloaded functions like the above.
+Fused types are Cython's version of generic programming and in this case amount to writing templated functions that compile into separate copies corresponding to the different C++ overloads.
+For the above functions, the equivalent Cython function is
+```cython
+ctypedef fused ColumnOrTable:
+    Table
+    Column
+
+cpdef ColumnOrTable empty_like(ColumnOrTable input)
+```
+
+[Cython supports specializing the contents of fused-type functions based on the argument types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html#type-checking-specializations), so any type-specific logic may be encoded using the appropriate conditionals.
+See the pylibcudf source for examples of how to implement such functions.
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import pickle
 
@@ -184,26 +184,21 @@ def scatter(list sources, Column scatter_map, list target_columns,
                 f"index out of bounds for column of size {n_rows}"
             )
 
-    if isinstance(sources[0], Column):
-        tbl = pylibcudf.copying.scatter_table(
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]),
-            scatter_map.to_pylibcudf(mode="read"),
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-        )
-    else:
-        tbl = pylibcudf.copying.scatter_scalars(
-            [(<DeviceScalar> as_device_scalar(slr)).c_value for slr in sources],
-            scatter_map.to_pylibcudf(mode="read"),
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-        )
+    tbl = pylibcudf.copying.scatter(
+        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources])
+        if isinstance(sources[0], Column)
+        else [(<DeviceScalar> as_device_scalar(slr)).c_value for slr in sources],
+        scatter_map.to_pylibcudf(mode="read"),
+        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
+    )
 
     return columns_from_pylibcudf_table(tbl)
 
 
 @acquire_spill_lock()
 def column_empty_like(Column input_column):
     return Column.from_pylibcudf(
-        pylibcudf.copying.empty_column_like(
+        pylibcudf.copying.empty_like(
             input_column.to_pylibcudf(mode="read")
         )
     )
@@ -222,7 +217,7 @@ def column_allocate_like(Column input_column, size=None):
 @acquire_spill_lock()
 def columns_empty_like(list input_columns):
     return columns_from_pylibcudf_table(
-        pylibcudf.copying.empty_table_like(
+        pylibcudf.copying.empty_like(
             pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns])
         )
     )
@@ -232,7 +227,7 @@ def columns_empty_like(list input_columns):
 def column_slice(Column input_column, object indices):
     return [
         Column.from_pylibcudf(c)
-        for c in pylibcudf.copying.column_slice(
+        for c in pylibcudf.copying.slice(
             input_column.to_pylibcudf(mode="read"),
             list(indices),
         )
@@ -243,7 +238,7 @@ def column_slice(Column input_column, object indices):
 def columns_slice(list input_columns, object indices):
     return [
         columns_from_pylibcudf_table(tbl)
-        for tbl in pylibcudf.copying.table_slice(
+        for tbl in pylibcudf.copying.slice(
             pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
             list(indices),
         )
@@ -254,7 +249,7 @@ def columns_slice(list input_columns, object indices):
 def column_split(Column input_column, object splits):
     return [
         Column.from_pylibcudf(c)
-        for c in pylibcudf.copying.column_split(
+        for c in pylibcudf.copying.split(
             input_column.to_pylibcudf(mode="read"),
             list(splits),
         )
@@ -265,7 +260,7 @@ def column_split(Column input_column, object splits):
 def columns_split(list input_columns, object splits):
     return [
         columns_from_pylibcudf_table(tbl)
-        for tbl in pylibcudf.copying.table_split(
+        for tbl in pylibcudf.copying.split(
             pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
             list(splits),
         )
@@ -303,18 +298,13 @@ def boolean_mask_scatter(list input_, list target_columns,
     if len(input_) == 0:
         return []
 
-    if isinstance(input_[0], Column):
-        tbl = pylibcudf.copying.boolean_mask_table_scatter(
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]),
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-            boolean_mask.to_pylibcudf(mode="read"),
-        )
-    else:
-        tbl = pylibcudf.copying.boolean_mask_scalars_scatter(
-            [(<DeviceScalar> as_device_scalar(i)).c_value for i in input_],
-            pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
-            boolean_mask.to_pylibcudf(mode="read"),
-        )
+    tbl = pylibcudf.copying.boolean_mask_scatter(
+        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_])
+        if isinstance(input_[0], Column)
+        else [(<DeviceScalar> as_device_scalar(i)).c_value for i in input_],
+        pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
+        boolean_mask.to_pylibcudf(mode="read"),
+    )
 
     return columns_from_pylibcudf_table(tbl)
 

@@ -3,12 +3,22 @@
 from cudf._lib.cpp.binaryop cimport binary_operator
 
 from .column cimport Column
+from .scalar cimport Scalar
 from .types cimport DataType
 
+# Need two separate fused types to generate the cartesian product of signatures.
+ctypedef fused LeftBinaryOperand:
+    Column
+    Scalar
+
+ctypedef fused RightBinaryOperand:
+    Column
+    Scalar
+
 
 cpdef Column binary_operation(
-    object lhs,
-    object rhs,
+    LeftBinaryOperand lhs,
+    RightBinaryOperand rhs,
     binary_operator op,
     DataType data_type
 )
@@ -18,25 +18,25 @@ from .types cimport DataType
 
 
 cpdef Column binary_operation(
-    object lhs,
-    object rhs,
+    LeftBinaryOperand lhs,
+    RightBinaryOperand rhs,
     binary_operator op,
     DataType data_type
 ):
     """Perform a binary operation between a column and another column or scalar.
 
-    Either ``lhs`` or ``rhs`` must be a
-    :py:class:`~cudf._lib.pylibcudf.column.Column`. The other may be a
+    ``lhs`` and ``rhs`` may be a
     :py:class:`~cudf._lib.pylibcudf.column.Column` or a
-    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
+    :py:class:`~cudf._lib.pylibcudf.scalar.Scalar`, but at least one must be a
+    :py:class:`~cudf._lib.pylibcudf.column.Column`.
 
     For details, see :cpp:func:`binary_operation`.
 
     Parameters
     ----------
-    lhs : Column or Scalar
+    lhs : Union[Column, Scalar]
         The left hand side argument.
-    rhs : Column or Scalar
+    rhs : Union[Column, Scalar]
         The right hand side argument.
     op : BinaryOperator
         The operation to perform.
@@ -50,32 +50,32 @@ cpdef Column binary_operation(
     """
     cdef unique_ptr[column] result
 
-    if isinstance(lhs, Column) and isinstance(rhs, Column):
+    if LeftBinaryOperand is Column and RightBinaryOperand is Column:
         with nogil:
             result = move(
                 cpp_binaryop.binary_operation(
-                    (<Column> lhs).view(),
-                    (<Column> rhs).view(),
+                    lhs.view(),
+                    rhs.view(),
                     op,
                     data_type.c_obj
                 )
             )
-    elif isinstance(lhs, Column) and isinstance(rhs, Scalar):
+    elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar:
         with nogil:
             result = move(
                 cpp_binaryop.binary_operation(
-                    (<Column> lhs).view(),
-                    dereference((<Scalar> rhs).c_obj),
+                    lhs.view(),
+                    dereference(rhs.c_obj),
                     op,
                     data_type.c_obj
                 )
             )
-    elif isinstance(lhs, Scalar) and isinstance(rhs, Column):
+    elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column:
         with nogil:
             result = move(
                 cpp_binaryop.binary_operation(
-                    dereference((<Scalar> lhs).c_obj),
-                    (<Column> rhs).view(),
+                    dereference(lhs.c_obj),
+                    rhs.view(),
                     op,
                     data_type.c_obj
                 )

@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from libcpp cimport bool as cbool
 
@@ -9,20 +9,36 @@ from .column cimport Column
 from .scalar cimport Scalar
 from .table cimport Table
 
+ctypedef fused ColumnOrTable:
+    Table
+    Column
+
+
+ctypedef fused TableOrListOfScalars:
+    Table
+    # The contents of the list must be validated as Scalars at runtime.
+    list
+
+
+# Need two separate fused types to generate the cartesian product of signatures.
+ctypedef fused LeftCopyIfElseOperand:
+    Column
+    Scalar
+
+ctypedef fused RightCopyIfElseOperand:
+    Column
+    Scalar
+
 
 cpdef Table gather(
     Table source_table,
     Column gather_map,
     out_of_bounds_policy bounds_policy
 )
 
-cpdef Table scatter_table(Table source, Column scatter_map, Table target_table)
-
-cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table)
+cpdef Table scatter(TableOrListOfScalars source, Column scatter_map, Table target_table)
 
-cpdef object empty_column_like(Column input)
-
-cpdef object empty_table_like(Table input)
+cpdef ColumnOrTable empty_like(ColumnOrTable input)
 
 cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*)
 
@@ -44,18 +60,20 @@ cpdef Column copy_range(
 
 cpdef Column shift(Column input, size_type offset, Scalar fill_values)
 
-cpdef list column_split(Column input_column, list splits)
-
-cpdef list table_split(Table input_table, list splits)
-
-cpdef list column_slice(Column input_column, list indices)
+cpdef list split(ColumnOrTable input, list splits)
 
-cpdef list table_slice(Table input_table, list indices)
+cpdef list slice(ColumnOrTable input, list indices)
 
-cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask)
-
-cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask)
+cpdef Column copy_if_else(
+    LeftCopyIfElseOperand lhs,
+    RightCopyIfElseOperand rhs,
+    Column boolean_mask
+)
 
-cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask)
+cpdef Table boolean_mask_scatter(
+    TableOrListOfScalars input,
+    Table target,
+    Column boolean_mask
+)
 
 cpdef Scalar get_element(Column input_column, size_type index)