Expose type traits to pylibcudf (#16197)

Rather than recreating the classification, OAOO by using the libcudf definitions. Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - Bradley Dice (https://github.com/bdice) URL: #16197
rapidsai · Jul 5, 2024 · ae42218 · ae42218
1 parent f3a1216
commit ae42218
Show file tree

Hide file tree

Showing 13 changed files with 361 additions and 75 deletions.
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -18,27 +18,28 @@ This page provides API documentation for pylibcudf.
     filling
     gpumemoryview
     groupby
-    io/index.rst
     interop
     join
     lists
     merge
     quantiles
     reduce
+    replace
     reshape
     rolling
     round
     scalar
     search
-    stream_compaction
     sorting
-    replace
+    stream_compaction
     table
+    traits
     types
     unary
 
 .. toctree::
     :maxdepth: 2
     :caption: Subpackages
 
+    io/index.rst
     strings/index.rst
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
@@ -0,0 +1,6 @@
+======
+traits
+======
+
+.. automodule:: cudf._lib.pylibcudf.traits
+   :members:
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -38,6 +38,7 @@ set(cython_sources
     stream_compaction.pyx
     sorting.pyx
     table.pyx
+    traits.pyx
     types.pyx
     unary.pyx
     utils.pyx

diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -23,6 +23,7 @@ from . cimport (
     sorting,
     stream_compaction,
     strings,
+    traits,
     types,
     unary,
 )
@@ -54,12 +55,14 @@ __all__ = [
     "quantiles",
     "reduce",
     "replace",
+    "reshape",
     "rolling",
     "round",
     "search",
     "stream_compaction",
     "strings",
     "sorting",
+    "traits",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -23,6 +23,7 @@
     sorting,
     stream_compaction,
     strings,
+    traits,
     types,
     unary,
 )
@@ -35,6 +36,7 @@
 __all__ = [
     "Column",
     "DataType",
+    "MaskState",
     "Scalar",
     "Table",
     "TypeId",
@@ -54,12 +56,14 @@
     "quantiles",
     "reduce",
     "replace",
+    "reshape",
     "rolling",
     "round",
     "search",
     "stream_compaction",
     "strings",
     "sorting",
+    "traits",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+from libcpp.vector cimport vector
+
+from cudf._lib.pylibcudf.libcudf.types cimport data_type
+
+
+cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
+    cdef bool is_relationally_comparable(data_type)
+    cdef bool is_equality_comparable(data_type)
+    cdef bool is_numeric(data_type)
+    cdef bool is_index_type(data_type)
+    cdef bool is_unsigned(data_type)
+    cdef bool is_integral(data_type)
+    cdef bool is_integral_not_bool(data_type)
+    cdef bool is_floating_point(data_type)
+    cdef bool is_boolean(data_type)
+    cdef bool is_timestamp(data_type)
+    cdef bool is_fixed_point(data_type)
+    cdef bool is_duration(data_type)
+    cdef bool is_chrono(data_type)
+    cdef bool is_dictionary(data_type)
+    cdef bool is_fixed_width(data_type)
+    cdef bool is_compound(data_type)
+    cdef bool is_nested(data_type)
+    cdef bool is_bit_castable(data_type, data_type)
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pxd b/python/cudf/cudf/_lib/pylibcudf/traits.pxd
@@ -0,0 +1,25 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+
+from .types cimport DataType
+
+
+cpdef bool is_relationally_comparable(DataType typ)
+cpdef bool is_equality_comparable(DataType typ)
+cpdef bool is_numeric(DataType typ)
+cpdef bool is_index_type(DataType typ)
+cpdef bool is_unsigned(DataType typ)
+cpdef bool is_integral(DataType typ)
+cpdef bool is_integral_not_bool(DataType typ)
+cpdef bool is_floating_point(DataType typ)
+cpdef bool is_boolean(DataType typ)
+cpdef bool is_timestamp(DataType typ)
+cpdef bool is_fixed_point(DataType typ)
+cpdef bool is_duration(DataType typ)
+cpdef bool is_chrono(DataType typ)
+cpdef bool is_dictionary(DataType typ)
+cpdef bool is_fixed_width(DataType typ)
+cpdef bool is_compound(DataType typ)
+cpdef bool is_nested(DataType typ)
+cpdef bool is_bit_castable(DataType source, DataType target)
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pyx b/python/cudf/cudf/_lib/pylibcudf/traits.pyx
@@ -0,0 +1,151 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+
+from cudf._lib.pylibcudf.libcudf.utilities cimport traits
+
+from .types cimport DataType
+
+
+cpdef bool is_relationally_comparable(DataType typ):
+    """Checks if the given data type supports relational comparisons.
+
+    For details, see :cpp:func:`is_relationally_comparable`.
+    """
+    return traits.is_relationally_comparable(typ.c_obj)
+
+
+cpdef bool is_equality_comparable(DataType typ):
+    """Checks if the given data type supports equality comparisons.
+
+    For details, see :cpp:func:`is_equality_comparable`.
+    """
+    return traits.is_equality_comparable(typ.c_obj)
+
+
+cpdef bool is_numeric(DataType typ):
+    """Checks if the given data type is numeric.
+
+    For details, see :cpp:func:`is_numeric`.
+    """
+    return traits.is_numeric(typ.c_obj)
+
+
+cpdef bool is_index_type(DataType typ):
+    """Checks if the given data type is an index type.
+
+    For details, see :cpp:func:`is_index_type`.
+    """
+    return traits.is_index_type(typ.c_obj)
+
+
+cpdef bool is_unsigned(DataType typ):
+    """Checks if the given data type is an unsigned type.
+
+    For details, see :cpp:func:`is_unsigned`.
+    """
+    return traits.is_unsigned(typ.c_obj)
+
+
+cpdef bool is_integral(DataType typ):
+    """Checks if the given data type is an integral type.
+
+    For details, see :cpp:func:`is_integral`.
+    """
+    return traits.is_integral(typ.c_obj)
+
+
+cpdef bool is_integral_not_bool(DataType typ):
+    """Checks if the given data type is an integral type excluding booleans.
+
+    For details, see :cpp:func:`is_integral_not_bool`.
+    """
+    return traits.is_integral_not_bool(typ.c_obj)
+
+
+cpdef bool is_floating_point(DataType typ):
+    """Checks if the given data type is a floating point type.
+
+    For details, see :cpp:func:`is_floating_point`.
+    """
+    return traits.is_floating_point(typ.c_obj)
+
+
+cpdef bool is_boolean(DataType typ):
+    """Checks if the given data type is a boolean type.
+
+    For details, see :cpp:func:`is_boolean`.
+    """
+    return traits.is_boolean(typ.c_obj)
+
+
+cpdef bool is_timestamp(DataType typ):
+    """Checks if the given data type is a timestamp type.
+
+    For details, see :cpp:func:`is_timestamp`.
+    """
+    return traits.is_timestamp(typ.c_obj)
+
+
+cpdef bool is_fixed_point(DataType typ):
+    """Checks if the given data type is a fixed point type.
+
+    For details, see :cpp:func:`is_fixed_point`.
+    """
+    return traits.is_fixed_point(typ.c_obj)
+
+
+cpdef bool is_duration(DataType typ):
+    """Checks if the given data type is a duration type.
+
+    For details, see :cpp:func:`is_duration`.
+    """
+    return traits.is_duration(typ.c_obj)
+
+
+cpdef bool is_chrono(DataType typ):
+    """Checks if the given data type is a chrono type.
+
+    For details, see :cpp:func:`is_chrono`.
+    """
+    return traits.is_chrono(typ.c_obj)
+
+
+cpdef bool is_dictionary(DataType typ):
+    """Checks if the given data type is a dictionary type.
+
+    For details, see :cpp:func:`is_dictionary`.
+    """
+    return traits.is_dictionary(typ.c_obj)
+
+
+cpdef bool is_fixed_width(DataType typ):
+    """Checks if the given data type is a fixed width type.
+
+    For details, see :cpp:func:`is_fixed_width`.
+    """
+    return traits.is_fixed_width(typ.c_obj)
+
+
+cpdef bool is_compound(DataType typ):
+    """Checks if the given data type is a compound type.
+
+    For details, see :cpp:func:`is_compound`.
+    """
+    return traits.is_compound(typ.c_obj)
+
+
+cpdef bool is_nested(DataType typ):
+    """Checks if the given data type is a nested type.
+
+    For details, see :cpp:func:`is_nested`.
+    """
+    return traits.is_nested(typ.c_obj)
+
+
+cpdef bool is_bit_castable(DataType source, DataType target):
+    """Checks if the source type is bit-castable to the target type.
+
+    For details, see :cpp:func:`is_bit_castable`.
+    """
+    return traits.is_bit_castable(source.c_obj, target.c_obj)
diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py
@@ -102,49 +102,10 @@ def cudf_raises(expected_exception: BaseException, *args, **kwargs):
     return pytest.raises(expected_exception, *args, **kwargs)
 
 
-# TODO: Consider moving these type utilities into pylibcudf.types itself.
-def is_signed_integer(plc_dtype: plc.DataType):
-    return (
-        plc.TypeId.INT8.value <= plc_dtype.id().value <= plc.TypeId.INT64.value
-    )
-
-
-def is_integer(plc_dtype: plc.DataType):
-    return plc_dtype.id() in (
-        plc.TypeId.INT8,
-        plc.TypeId.INT16,
-        plc.TypeId.INT32,
-        plc.TypeId.INT64,
-        plc.TypeId.UINT8,
-        plc.TypeId.UINT16,
-        plc.TypeId.UINT32,
-        plc.TypeId.UINT64,
-    )
-
-
-def is_floating(plc_dtype: plc.DataType):
-    return plc_dtype.id() in (
-        plc.TypeId.FLOAT32,
-        plc.TypeId.FLOAT64,
-    )
-
-
-def is_boolean(plc_dtype: plc.DataType):
-    return plc_dtype.id() == plc.TypeId.BOOL8
-
-
 def is_string(plc_dtype: plc.DataType):
     return plc_dtype.id() == plc.TypeId.STRING
 
 
-def is_fixed_width(plc_dtype: plc.DataType):
-    return (
-        is_integer(plc_dtype)
-        or is_floating(plc_dtype)
-        or is_boolean(plc_dtype)
-    )
-
-
 def nesting_level(typ) -> tuple[int, int]:
     """Return list and struct nesting of a pyarrow type."""
     if isinstance(typ, pa.ListType):