Skip to content

Commit

Permalink
Use fused types for overloaded function signatures (#14969)
Browse files Browse the repository at this point in the history
This change makes the pylibcudf API more convenient and a more faithful reproduction of the underlying libcudf APIs that offer overloaded signatures. In cases like binary ops where we were previously using runtime instance checks, this change also removes unnecessary runtime overhead if the calling code is Cython since in those cases the types at the call site are known at compile time.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: #14969
  • Loading branch information
vyasr authored Feb 5, 2024
1 parent de1da2b commit dfc7f25
Show file tree
Hide file tree
Showing 6 changed files with 241 additions and 327 deletions.
23 changes: 23 additions & 0 deletions docs/cudf/source/developer_guide/pylibcudf.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,26 @@ from cudf._lib.cpp.copying cimport out_of_bounds_policy
from cudf._lib.cpp.copying import \
out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint
```

### Handling overloaded functions in libcudf
As a C++ library, libcudf makes extensive use of function overloading.
For example, both of the following functions exist in libcudf:
```cpp
std::unique_ptr<table> empty_like(table_view const& input_table);
std::unique_ptr<column> empty_like(column_view const& input);
```
However, Cython does not directly support overloading in this way, instead following Pythonic semantics where every function name must uniquely identify the function.
Therefore, Cython's [fused types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html) should be used when implementing pylibcudf wrappers of overloaded functions like the above.
Fused types are Cython's version of generic programming and in this case amount to writing templated functions that compile into separate copies corresponding to the different C++ overloads.
For the above functions, the equivalent Cython function is
```cython
ctypedef fused ColumnOrTable:
Table
Column
cpdef ColumnOrTable empty_like(ColumnOrTable input)
```

[Cython supports specializing the contents of fused-type functions based on the argument types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html#type-checking-specializations), so any type-specific logic may be encoded using the appropriate conditionals.
See the pylibcudf source for examples of how to implement such functions.
52 changes: 21 additions & 31 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import pickle

Expand Down Expand Up @@ -184,26 +184,21 @@ def scatter(list sources, Column scatter_map, list target_columns,
f"index out of bounds for column of size {n_rows}"
)

if isinstance(sources[0], Column):
tbl = pylibcudf.copying.scatter_table(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources]),
scatter_map.to_pylibcudf(mode="read"),
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
)
else:
tbl = pylibcudf.copying.scatter_scalars(
[(<DeviceScalar> as_device_scalar(slr)).c_value for slr in sources],
scatter_map.to_pylibcudf(mode="read"),
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
)
tbl = pylibcudf.copying.scatter(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in sources])
if isinstance(sources[0], Column)
else [(<DeviceScalar> as_device_scalar(slr)).c_value for slr in sources],
scatter_map.to_pylibcudf(mode="read"),
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
)

return columns_from_pylibcudf_table(tbl)


@acquire_spill_lock()
def column_empty_like(Column input_column):
return Column.from_pylibcudf(
pylibcudf.copying.empty_column_like(
pylibcudf.copying.empty_like(
input_column.to_pylibcudf(mode="read")
)
)
Expand All @@ -222,7 +217,7 @@ def column_allocate_like(Column input_column, size=None):
@acquire_spill_lock()
def columns_empty_like(list input_columns):
return columns_from_pylibcudf_table(
pylibcudf.copying.empty_table_like(
pylibcudf.copying.empty_like(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns])
)
)
Expand All @@ -232,7 +227,7 @@ def columns_empty_like(list input_columns):
def column_slice(Column input_column, object indices):
return [
Column.from_pylibcudf(c)
for c in pylibcudf.copying.column_slice(
for c in pylibcudf.copying.slice(
input_column.to_pylibcudf(mode="read"),
list(indices),
)
Expand All @@ -243,7 +238,7 @@ def column_slice(Column input_column, object indices):
def columns_slice(list input_columns, object indices):
return [
columns_from_pylibcudf_table(tbl)
for tbl in pylibcudf.copying.table_slice(
for tbl in pylibcudf.copying.slice(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
list(indices),
)
Expand All @@ -254,7 +249,7 @@ def columns_slice(list input_columns, object indices):
def column_split(Column input_column, object splits):
return [
Column.from_pylibcudf(c)
for c in pylibcudf.copying.column_split(
for c in pylibcudf.copying.split(
input_column.to_pylibcudf(mode="read"),
list(splits),
)
Expand All @@ -265,7 +260,7 @@ def column_split(Column input_column, object splits):
def columns_split(list input_columns, object splits):
return [
columns_from_pylibcudf_table(tbl)
for tbl in pylibcudf.copying.table_split(
for tbl in pylibcudf.copying.split(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_columns]),
list(splits),
)
Expand Down Expand Up @@ -303,18 +298,13 @@ def boolean_mask_scatter(list input_, list target_columns,
if len(input_) == 0:
return []

if isinstance(input_[0], Column):
tbl = pylibcudf.copying.boolean_mask_table_scatter(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_]),
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
boolean_mask.to_pylibcudf(mode="read"),
)
else:
tbl = pylibcudf.copying.boolean_mask_scalars_scatter(
[(<DeviceScalar> as_device_scalar(i)).c_value for i in input_],
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
boolean_mask.to_pylibcudf(mode="read"),
)
tbl = pylibcudf.copying.boolean_mask_scatter(
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in input_])
if isinstance(input_[0], Column)
else [(<DeviceScalar> as_device_scalar(i)).c_value for i in input_],
pylibcudf.Table([col.to_pylibcudf(mode="read") for col in target_columns]),
boolean_mask.to_pylibcudf(mode="read"),
)

return columns_from_pylibcudf_table(tbl)

Expand Down
14 changes: 12 additions & 2 deletions python/cudf/cudf/_lib/pylibcudf/binaryop.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@
from cudf._lib.cpp.binaryop cimport binary_operator

from .column cimport Column
from .scalar cimport Scalar
from .types cimport DataType

# Need two separate fused types to generate the cartesian product of signatures.
ctypedef fused LeftBinaryOperand:
Column
Scalar

ctypedef fused RightBinaryOperand:
Column
Scalar


cpdef Column binary_operation(
object lhs,
object rhs,
LeftBinaryOperand lhs,
RightBinaryOperand rhs,
binary_operator op,
DataType output_type
)
32 changes: 16 additions & 16 deletions python/cudf/cudf/_lib/pylibcudf/binaryop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,25 @@ from .types cimport DataType


cpdef Column binary_operation(
object lhs,
object rhs,
LeftBinaryOperand lhs,
RightBinaryOperand rhs,
binary_operator op,
DataType output_type
):
"""Perform a binary operation between a column and another column or scalar.
Either ``lhs`` or ``rhs`` must be a
:py:class:`~cudf._lib.pylibcudf.column.Column`. The other may be a
``lhs`` and ``rhs`` may be a
:py:class:`~cudf._lib.pylibcudf.column.Column` or a
:py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
:py:class:`~cudf._lib.pylibcudf.scalar.Scalar`, but at least one must be a
:py:class:`~cudf._lib.pylibcudf.column.Column`.
For details, see :cpp:func:`binary_operation`.
Parameters
----------
lhs : Column or Scalar
lhs : Union[Column, Scalar]
The left hand side argument.
rhs : Column or Scalar
rhs : Union[Column, Scalar]
The right hand side argument.
op : BinaryOperator
The operation to perform.
Expand All @@ -50,32 +50,32 @@ cpdef Column binary_operation(
"""
cdef unique_ptr[column] result

if isinstance(lhs, Column) and isinstance(rhs, Column):
if LeftBinaryOperand is Column and RightBinaryOperand is Column:
with nogil:
result = move(
cpp_binaryop.binary_operation(
(<Column> lhs).view(),
(<Column> rhs).view(),
lhs.view(),
rhs.view(),
op,
output_type.c_obj
)
)
elif isinstance(lhs, Column) and isinstance(rhs, Scalar):
elif LeftBinaryOperand is Column and RightBinaryOperand is Scalar:
with nogil:
result = move(
cpp_binaryop.binary_operation(
(<Column> lhs).view(),
dereference((<Scalar> rhs).c_obj),
lhs.view(),
dereference(rhs.c_obj),
op,
output_type.c_obj
)
)
elif isinstance(lhs, Scalar) and isinstance(rhs, Column):
elif LeftBinaryOperand is Scalar and RightBinaryOperand is Column:
with nogil:
result = move(
cpp_binaryop.binary_operation(
dereference((<Scalar> lhs).c_obj),
(<Column> rhs).view(),
dereference(lhs.c_obj),
rhs.view(),
op,
output_type.c_obj
)
Expand Down
52 changes: 35 additions & 17 deletions python/cudf/cudf/_lib/pylibcudf/copying.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from libcpp cimport bool as cbool

Expand All @@ -9,20 +9,36 @@ from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table

ctypedef fused ColumnOrTable:
Table
Column


ctypedef fused TableOrListOfScalars:
Table
# The contents of the list must be validated as Scalars at runtime.
list


# Need two separate fused types to generate the cartesian product of signatures.
ctypedef fused LeftCopyIfElseOperand:
Column
Scalar

ctypedef fused RightCopyIfElseOperand:
Column
Scalar


cpdef Table gather(
Table source_table,
Column gather_map,
out_of_bounds_policy bounds_policy
)

cpdef Table scatter_table(Table source, Column scatter_map, Table target_table)

cpdef Table scatter_scalars(list source, Column scatter_map, Table target_table)
cpdef Table scatter(TableOrListOfScalars source, Column scatter_map, Table target_table)

cpdef object empty_column_like(Column input)

cpdef object empty_table_like(Table input)
cpdef ColumnOrTable empty_like(ColumnOrTable input)

cpdef Column allocate_like(Column input_column, mask_allocation_policy policy, size=*)

Expand All @@ -44,18 +60,20 @@ cpdef Column copy_range(

cpdef Column shift(Column input, size_type offset, Scalar fill_values)

cpdef list column_split(Column input_column, list splits)

cpdef list table_split(Table input_table, list splits)

cpdef list column_slice(Column input_column, list indices)
cpdef list split(ColumnOrTable input, list splits)

cpdef list table_slice(Table input_table, list indices)
cpdef list slice(ColumnOrTable input, list indices)

cpdef Column copy_if_else(object lhs, object rhs, Column boolean_mask)

cpdef Table boolean_mask_table_scatter(Table input, Table target, Column boolean_mask)
cpdef Column copy_if_else(
LeftCopyIfElseOperand lhs,
RightCopyIfElseOperand rhs,
Column boolean_mask
)

cpdef Table boolean_mask_scalars_scatter(list input, Table target, Column boolean_mask)
cpdef Table boolean_mask_scatter(
TableOrListOfScalars input,
Table target,
Column boolean_mask
)

cpdef Scalar get_element(Column input_column, size_type index)
Loading

0 comments on commit dfc7f25

Please sign in to comment.