Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add to_dlpack/from_dlpack APIs to pylibcudf #17055

Merged
merged 11 commits into from
Oct 25, 2024
69 changes: 8 additions & 61 deletions python/cudf/cudf/_lib/interop.pyx
Original file line number Diff line number Diff line change
@@ -1,49 +1,22 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from cpython cimport pycapsule
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

import pylibcudf

from pylibcudf.libcudf.interop cimport (
DLManagedTensor,
from_dlpack as cpp_from_dlpack,
to_dlpack as cpp_to_dlpack,
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view

from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
columns_from_unique_ptr,
table_view_from_columns,
)
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf.core.buffer import acquire_spill_lock
from cudf.core.dtypes import ListDtype, StructDtype


def from_dlpack(dlpack_capsule):
def from_dlpack(object dlpack_capsule):
"""
Converts a DLPack Tensor PyCapsule into a list of columns.

DLPack Tensor PyCapsule is expected to have the name "dltensor".
"""
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>pycapsule.\
PyCapsule_GetPointer(dlpack_capsule, 'dltensor')
pycapsule.PyCapsule_SetName(dlpack_capsule, 'used_dltensor')

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_from_dlpack(dlpack_tensor)
)

res = columns_from_unique_ptr(move(c_result))
dlpack_tensor.deleter(dlpack_tensor)
return res
return columns_from_pylibcudf_table(
pylibcudf.interop.from_dlpack(dlpack_capsule)
)


def to_dlpack(list source_columns):
Expand All @@ -52,39 +25,13 @@ def to_dlpack(list source_columns):

DLPack Tensor PyCapsule will have the name "dltensor".
"""
if any(column.null_count for column in source_columns):
raise ValueError(
"Cannot create a DLPack tensor with null values. \
Input is required to have null count as zero."
)

cdef DLManagedTensor *dlpack_tensor
cdef table_view source_table_view = table_view_from_columns(source_columns)

with nogil:
dlpack_tensor = cpp_to_dlpack(
source_table_view
return pylibcudf.interop.to_dlpack(
pylibcudf.Table(
[col.to_pylibcudf(mode="read") for col in source_columns]
)

return pycapsule.PyCapsule_New(
dlpack_tensor,
'dltensor',
dlmanaged_tensor_pycapsule_deleter
)


cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept:
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>0
try:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'used_dltensor')
return # we do not call a used capsule's deleter
except Exception:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'dltensor')
dlpack_tensor.deleter(dlpack_tensor)


def gather_metadata(object cols_dtypes):
"""
Generates a ColumnMetadata vector for each column.
Expand Down
2 changes: 2 additions & 0 deletions python/pylibcudf/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
expressions,
filling,
groupby,
interop,
join,
json,
labeling,
Expand Down Expand Up @@ -62,6 +63,7 @@ __all__ = [
"filling",
"gpumemoryview",
"groupby",
"interop",
"join",
"json",
"lists",
Expand Down
8 changes: 8 additions & 0 deletions python/pylibcudf/pylibcudf/interop.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.table cimport Table


cpdef Table from_dlpack(object managed_tensor)

cpdef object to_dlpack(Table input)
90 changes: 89 additions & 1 deletion python/pylibcudf/pylibcudf/interop.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New
from cpython.pycapsule cimport (
PyCapsule_GetPointer,
PyCapsule_IsValid,
PyCapsule_New,
PyCapsule_SetName,
)
from libc.stdlib cimport free
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
Expand All @@ -16,11 +21,14 @@ from pylibcudf.libcudf.interop cimport (
ArrowArray,
ArrowArrayStream,
ArrowSchema,
DLManagedTensor,
column_metadata,
from_arrow_column as cpp_from_arrow_column,
from_arrow_stream as cpp_from_arrow_stream,
from_dlpack as cpp_from_dlpack,
to_arrow_host_raw,
to_arrow_schema_raw,
to_dlpack as cpp_to_dlpack,
)
from pylibcudf.libcudf.table.table cimport table

Expand Down Expand Up @@ -315,3 +323,83 @@ def _to_arrow_scalar(cudf_object, metadata=None):
# Note that metadata for scalars is primarily important for preserving
# information on nested types since names are otherwise irrelevant.
return to_arrow(Column.from_scalar(cudf_object, 1), metadata=metadata)[0]


cpdef Table from_dlpack(object managed_tensor):
"""
Convert a DLPack DLTensor into a cudf table.

For details, see :cpp:func:`cudf::from_dlpack`

Parameters
----------
managed_tensor : PyCapsule
A 1D or 2D column-major (Fortran order) tensor.

Returns
-------
Table
Table with a copy of the tensor data.
"""
if not PyCapsule_IsValid(managed_tensor, "dltensor"):
raise ValueError("Invalid capsule object")
cdef unique_ptr[table] c_result
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>PyCapsule_GetPointer(
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
managed_tensor, "dltensor"
)
PyCapsule_SetName(managed_tensor, "used_dltensor")

# Note: A copy is always performed when converting the dlpack
# data to a libcudf table. We also delete the dlpack_tensor pointer
# as the poionter is not deleted by libcudf's from_dlpack function.
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
# TODO: https://github.com/rapidsai/cudf/issues/10874
# TODO: https://github.com/rapidsai/cudf/issues/10849
with nogil:
c_result = cpp_from_dlpack(dlpack_tensor)

cdef Table result = Table.from_libcudf(move(c_result))
dlpack_tensor.deleter(dlpack_tensor)
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
return result


cpdef object to_dlpack(Table input):
"""
Convert a cudf table into a DLPack DLTensor.

For details, see :cpp:func:`cudf::to_dlpack`

Parameters
----------
input : Table
A 1D or 2D column-major (Fortran order) tensor.

Returns
-------
PyCapsule
1D or 2D DLPack tensor with a copy of the table data, or nullptr.
"""
for col in input._columns:
if col.null_count():
raise ValueError(
"Cannot create a DLPack tensor with null values. "
"Input is required to have null count as zero."
)
cdef DLManagedTensor *dlpack_tensor

with nogil:
dlpack_tensor = cpp_to_dlpack(input.view())

return PyCapsule_New(
dlpack_tensor,
"dltensor",
dlmanaged_tensor_pycapsule_deleter
)


cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept:
if PyCapsule_IsValid(pycap_obj, "used_dltensor"):
# we do not call a used capsule's deleter
return
cdef DLManagedTensor* dlpack_tensor
dlpack_tensor = <DLManagedTensor*>PyCapsule_GetPointer(pycap_obj, "dltensor")
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
dlpack_tensor.deleter(dlpack_tensor)
10 changes: 6 additions & 4 deletions python/pylibcudf/pylibcudf/libcudf/interop.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ cdef extern from "cudf/interop.hpp" nogil:

cdef extern from "cudf/interop.hpp" namespace "cudf" \
nogil:
cdef unique_ptr[table] from_dlpack(const DLManagedTensor* tensor
) except +
cdef unique_ptr[table] from_dlpack(
const DLManagedTensor* managed_tensor
) except +

DLManagedTensor* to_dlpack(table_view input_table
) except +
DLManagedTensor* to_dlpack(
table_view input
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
) except +

cdef cppclass column_metadata:
column_metadata() except +
Expand Down
31 changes: 31 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_interop.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import cupy as cp
import numpy as np
import pyarrow as pa
import pylibcudf as plc
import pytest
from utils import assert_table_eq


def test_list_dtype_roundtrip():
Expand Down Expand Up @@ -66,3 +69,31 @@ def test_decimal_other(data_type):

arrow_type = plc.interop.to_arrow(data_type, precision=precision)
assert arrow_type == pa.decimal128(precision, 0)


def test_dlpack_plc_able():
expected = pa.table({"a": [1, 2, 3], "b": [5, 6, 7]})
plc_table = plc.interop.from_arrow(expected)
result = plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table))
assert_table_eq(expected, result)


def test_dlpack_cupy_array():
arr = cp.arange(3)
result = plc.interop.from_dlpack(arr.toDlpack())
expected = pa.table({"a": [0, 1, 2]})
assert_table_eq(expected, result)


def test_dlpack_numpy_array():
arr = np.arange(3)
result = plc.interop.from_dlpack(arr.__dlpack__())
expected = pa.table({"a": [0, 1, 2]})
assert_table_eq(expected, result)


def test_to_dlpack_error():
expected = pa.table({"a": [1, None, 3], "b": [5, 6, 7]})
plc_table = plc.interop.from_arrow(expected)
with pytest.raises(ValueError):
plc.interop.from_dlpack(plc.interop.to_dlpack(plc_table))
Loading