Skip to content

Commit

Permalink
Expose type traits to pylibcudf (#16197)
Browse files Browse the repository at this point in the history
Rather than recreating the classification, OAOO by using the libcudf definitions.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #16197
  • Loading branch information
wence- authored Jul 5, 2024
1 parent f3a1216 commit ae42218
Show file tree
Hide file tree
Showing 13 changed files with 361 additions and 75 deletions.
7 changes: 4 additions & 3 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@ This page provides API documentation for pylibcudf.
filling
gpumemoryview
groupby
io/index.rst
interop
join
lists
merge
quantiles
reduce
replace
reshape
rolling
round
scalar
search
stream_compaction
sorting
replace
stream_compaction
table
traits
types
unary

.. toctree::
:maxdepth: 2
:caption: Subpackages

io/index.rst
strings/index.rst
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
======
traits
======

.. automodule:: cudf._lib.pylibcudf.traits
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ set(cython_sources
stream_compaction.pyx
sorting.pyx
table.pyx
traits.pyx
types.pyx
unary.pyx
utils.pyx
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ from . cimport (
sorting,
stream_compaction,
strings,
traits,
types,
unary,
)
Expand Down Expand Up @@ -54,12 +55,14 @@ __all__ = [
"quantiles",
"reduce",
"replace",
"reshape",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
"sorting",
"traits",
"types",
"unary",
]
4 changes: 4 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
sorting,
stream_compaction,
strings,
traits,
types,
unary,
)
Expand All @@ -35,6 +36,7 @@
__all__ = [
"Column",
"DataType",
"MaskState",
"Scalar",
"Table",
"TypeId",
Expand All @@ -54,12 +56,14 @@
"quantiles",
"reduce",
"replace",
"reshape",
"rolling",
"round",
"search",
"stream_compaction",
"strings",
"sorting",
"traits",
"types",
"unary",
]
27 changes: 27 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.types cimport data_type


cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
cdef bool is_relationally_comparable(data_type)
cdef bool is_equality_comparable(data_type)
cdef bool is_numeric(data_type)
cdef bool is_index_type(data_type)
cdef bool is_unsigned(data_type)
cdef bool is_integral(data_type)
cdef bool is_integral_not_bool(data_type)
cdef bool is_floating_point(data_type)
cdef bool is_boolean(data_type)
cdef bool is_timestamp(data_type)
cdef bool is_fixed_point(data_type)
cdef bool is_duration(data_type)
cdef bool is_chrono(data_type)
cdef bool is_dictionary(data_type)
cdef bool is_fixed_width(data_type)
cdef bool is_compound(data_type)
cdef bool is_nested(data_type)
cdef bool is_bit_castable(data_type, data_type)
25 changes: 25 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/traits.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from .types cimport DataType


cpdef bool is_relationally_comparable(DataType typ)
cpdef bool is_equality_comparable(DataType typ)
cpdef bool is_numeric(DataType typ)
cpdef bool is_index_type(DataType typ)
cpdef bool is_unsigned(DataType typ)
cpdef bool is_integral(DataType typ)
cpdef bool is_integral_not_bool(DataType typ)
cpdef bool is_floating_point(DataType typ)
cpdef bool is_boolean(DataType typ)
cpdef bool is_timestamp(DataType typ)
cpdef bool is_fixed_point(DataType typ)
cpdef bool is_duration(DataType typ)
cpdef bool is_chrono(DataType typ)
cpdef bool is_dictionary(DataType typ)
cpdef bool is_fixed_width(DataType typ)
cpdef bool is_compound(DataType typ)
cpdef bool is_nested(DataType typ)
cpdef bool is_bit_castable(DataType source, DataType target)
151 changes: 151 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/traits.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool

from cudf._lib.pylibcudf.libcudf.utilities cimport traits

from .types cimport DataType


cpdef bool is_relationally_comparable(DataType typ):
"""Checks if the given data type supports relational comparisons.
For details, see :cpp:func:`is_relationally_comparable`.
"""
return traits.is_relationally_comparable(typ.c_obj)


cpdef bool is_equality_comparable(DataType typ):
"""Checks if the given data type supports equality comparisons.
For details, see :cpp:func:`is_equality_comparable`.
"""
return traits.is_equality_comparable(typ.c_obj)


cpdef bool is_numeric(DataType typ):
"""Checks if the given data type is numeric.
For details, see :cpp:func:`is_numeric`.
"""
return traits.is_numeric(typ.c_obj)


cpdef bool is_index_type(DataType typ):
"""Checks if the given data type is an index type.
For details, see :cpp:func:`is_index_type`.
"""
return traits.is_index_type(typ.c_obj)


cpdef bool is_unsigned(DataType typ):
"""Checks if the given data type is an unsigned type.
For details, see :cpp:func:`is_unsigned`.
"""
return traits.is_unsigned(typ.c_obj)


cpdef bool is_integral(DataType typ):
"""Checks if the given data type is an integral type.
For details, see :cpp:func:`is_integral`.
"""
return traits.is_integral(typ.c_obj)


cpdef bool is_integral_not_bool(DataType typ):
"""Checks if the given data type is an integral type excluding booleans.
For details, see :cpp:func:`is_integral_not_bool`.
"""
return traits.is_integral_not_bool(typ.c_obj)


cpdef bool is_floating_point(DataType typ):
"""Checks if the given data type is a floating point type.
For details, see :cpp:func:`is_floating_point`.
"""
return traits.is_floating_point(typ.c_obj)


cpdef bool is_boolean(DataType typ):
"""Checks if the given data type is a boolean type.
For details, see :cpp:func:`is_boolean`.
"""
return traits.is_boolean(typ.c_obj)


cpdef bool is_timestamp(DataType typ):
"""Checks if the given data type is a timestamp type.
For details, see :cpp:func:`is_timestamp`.
"""
return traits.is_timestamp(typ.c_obj)


cpdef bool is_fixed_point(DataType typ):
"""Checks if the given data type is a fixed point type.
For details, see :cpp:func:`is_fixed_point`.
"""
return traits.is_fixed_point(typ.c_obj)


cpdef bool is_duration(DataType typ):
"""Checks if the given data type is a duration type.
For details, see :cpp:func:`is_duration`.
"""
return traits.is_duration(typ.c_obj)


cpdef bool is_chrono(DataType typ):
"""Checks if the given data type is a chrono type.
For details, see :cpp:func:`is_chrono`.
"""
return traits.is_chrono(typ.c_obj)


cpdef bool is_dictionary(DataType typ):
"""Checks if the given data type is a dictionary type.
For details, see :cpp:func:`is_dictionary`.
"""
return traits.is_dictionary(typ.c_obj)


cpdef bool is_fixed_width(DataType typ):
"""Checks if the given data type is a fixed width type.
For details, see :cpp:func:`is_fixed_width`.
"""
return traits.is_fixed_width(typ.c_obj)


cpdef bool is_compound(DataType typ):
"""Checks if the given data type is a compound type.
For details, see :cpp:func:`is_compound`.
"""
return traits.is_compound(typ.c_obj)


cpdef bool is_nested(DataType typ):
"""Checks if the given data type is a nested type.
For details, see :cpp:func:`is_nested`.
"""
return traits.is_nested(typ.c_obj)


cpdef bool is_bit_castable(DataType source, DataType target):
"""Checks if the source type is bit-castable to the target type.
For details, see :cpp:func:`is_bit_castable`.
"""
return traits.is_bit_castable(source.c_obj, target.c_obj)
39 changes: 0 additions & 39 deletions python/cudf/cudf/pylibcudf_tests/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,49 +102,10 @@ def cudf_raises(expected_exception: BaseException, *args, **kwargs):
return pytest.raises(expected_exception, *args, **kwargs)


# TODO: Consider moving these type utilities into pylibcudf.types itself.
def is_signed_integer(plc_dtype: plc.DataType):
return (
plc.TypeId.INT8.value <= plc_dtype.id().value <= plc.TypeId.INT64.value
)


def is_integer(plc_dtype: plc.DataType):
return plc_dtype.id() in (
plc.TypeId.INT8,
plc.TypeId.INT16,
plc.TypeId.INT32,
plc.TypeId.INT64,
plc.TypeId.UINT8,
plc.TypeId.UINT16,
plc.TypeId.UINT32,
plc.TypeId.UINT64,
)


def is_floating(plc_dtype: plc.DataType):
return plc_dtype.id() in (
plc.TypeId.FLOAT32,
plc.TypeId.FLOAT64,
)


def is_boolean(plc_dtype: plc.DataType):
return plc_dtype.id() == plc.TypeId.BOOL8


def is_string(plc_dtype: plc.DataType):
return plc_dtype.id() == plc.TypeId.STRING


def is_fixed_width(plc_dtype: plc.DataType):
return (
is_integer(plc_dtype)
or is_floating(plc_dtype)
or is_boolean(plc_dtype)
)


def nesting_level(typ) -> tuple[int, int]:
"""Return list and struct nesting of a pyarrow type."""
if isinstance(typ, pa.ListType):
Expand Down
Loading

0 comments on commit ae42218

Please sign in to comment.