From b6d08cae87aa489706a1fc1eefde5c1efe3f3ebf Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 25 Aug 2023 15:31:41 -0500 Subject: [PATCH] Switch pylibcudf-enabled types to use enum class in Cython (#13931) This PR leverages Cython 3's support for C++'s scoped enumerations to more faithfully translate libcudf types into cuDF Cython and pylibcudf. Due to some Cython 3 limitations, there are a few workarounds in this PR. However, they are relatively minor and can be adjusted later as Cython improves. In the meantime, the change here is an improvement still worth merging, especially since it sets a template for how pylibcudf code should look as more of it emerges. Existing cuDF Cython is only updated to the minimum extent required for it to compile. Fully switching the old code to use enum class-style syntax isn't worthwhile since those internals should eventually be migrated to use pylibcudf in pure Python mode anyway. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Ashwin Srinath (https://github.com/shwina) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/13931 --- docs/cudf/source/developer_guide/pylibcudf.md | 66 +++++++++++++++---- python/cudf/cudf/_lib/CMakeLists.txt | 1 + python/cudf/cudf/_lib/cpp/CMakeLists.txt | 23 +++++++ python/cudf/cudf/_lib/cpp/copying.pxd | 6 +- python/cudf/cudf/_lib/cpp/copying.pyx | 0 python/cudf/cudf/_lib/cpp/types.pxd | 65 +++++++++--------- python/cudf/cudf/_lib/cpp/types.pyx | 0 python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 5 +- python/cudf/cudf/_lib/pylibcudf/column.pyx | 5 +- python/cudf/cudf/_lib/pylibcudf/copying.pxd | 20 +----- python/cudf/cudf/_lib/pylibcudf/copying.pyx | 20 +++--- python/cudf/cudf/_lib/pylibcudf/types.pxd | 55 +--------------- python/cudf/cudf/_lib/pylibcudf/types.pyx | 16 ++--- python/cudf/cudf/_lib/scalar.pyx | 56 ++++++++-------- .../strings/convert/convert_fixed_point.pyx | 12 ++-- python/cudf/cudf/_lib/types.pyx | 1 + 16 files changed, 177 insertions(+), 174 deletions(-) create mode 100644 python/cudf/cudf/_lib/cpp/CMakeLists.txt create mode 100644 python/cudf/cudf/_lib/cpp/copying.pyx create mode 100644 python/cudf/cudf/_lib/cpp/types.pyx diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md index 4940e329653..1b321dbb1fe 100644 --- a/docs/cudf/source/developer_guide/pylibcudf.md +++ b/docs/cudf/source/developer_guide/pylibcudf.md @@ -86,7 +86,7 @@ cpdef Table gather( cpp_copying.gather( source_table.view(), gather_map.view(), - py_policy_to_c_policy(bounds_policy) + bounds_policy ) ) return Table.from_libcudf(move(c_result)) @@ -94,18 +94,62 @@ cpdef Table gather( There are a couple of notable points from the snippet above: - The object returned from libcudf is immediately converted to a pylibcudf type. -- `cudf::gather` accepts a `cudf::out_of_bounds_policy` enum parameter, which is mirrored by the `cdef `class OutOfBoundsPolicy` as mentioned in [the data structures example above](data-structures). +- `cudf::gather` accepts a `cudf::out_of_bounds_policy` enum parameter. `OutOfBoundsPolicy` is an alias for this type in pylibcudf that matches our Python naming conventions (CapsCase instead of snake\_case). ## Miscellaneous Notes -### Cython Scoped Enums and Casting -Cython does not support scoped enumerations. -It assumes that enums correspond to their underlying value types and will thus attempt operations that are invalid. -To fix this, many places in pylibcudf Cython code contain double casts that look like +### Cython Scoped Enums +Cython 3 introduced support for scoped enumerations. +However, this support has some bugs as well as some easy pitfalls. +Our usage of enums is intended to minimize the complexity of our code while also working around Cython's limitations. + +```{warning} +The guidance in this section may change often as Cython is updated and our understanding of best practices evolves. +``` + +- All pxd files that declare a C++ enum should use `cpdef enum class` declarations. + - Reason: This declaration makes the C++ enum available in Cython code while also transparently creating a Python enum. +- Any pxd file containing only C++ declarations must still have a corresponding pyx file if any of the declarations are scoped enums. + - Reason: The creation of the Python enum requires that Cython actually generate the necessary Python C API code, which will not happen if only a pxd file is present. +- If a C++ enum will be part of a pylibcudf module's public API, then it should be imported (not cimported) directly into the pyx file and aliased with a name that matches our Python class naming conventions (CapsCase) instead of our C++ naming convention (snake\_case). + - Reason: We want to expose the enum to both Python and Cython consumers of the module. As a side effect, this aliasing avoids [this Cython bug](https://github.com/cython/cython/issues/5609). + - Note: Once the above Cython bug is resolved, the enum should also be aliased into the pylibcudf pxd file when it is cimported so that Python and Cython usage will match. + +Here is an example of appropriate enum usage. + + ```cython -return ( - py_policy -) +# cpp/copying.pxd +cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: + # cpdef here so that we export both a cdef enum class and a Python enum.Enum. + cpdef enum class out_of_bounds_policy(bool): + NULLIFY + DONT_CHECK + + +# cpp/copying.pyx +# This file is empty, but is required to compile the Python enum in cpp/copying.pxd + + +# pylibcudf/copying.pxd + +# cimport the enum using the exact name +# Once https://github.com/cython/cython/issues/5609 is resolved, +# this import should instead be +# from cudf._lib.cpp.copying cimport out_of_bounds_policy as OutOfBoundsPolicy +from cudf._lib.cpp.copying cimport out_of_bounds_policy + + +# pylibcudf/copying.pyx +# Access cpp.copying members that aren't part of this module's public API via +# this module alias +from cudf._lib.cpp cimport copying as cpp_copying +from cudf._lib.cpp.copying cimport out_of_bounds_policy + +# This import exposes the enum in the public API of this module. +# It requires a no-cython-lint tag because it will be unused: all typing of +# parameters etc will need to use the Cython name `out_of_bounds_policy` until +# the Cython bug is resolved. +from cudf._lib.cpp.copying import \ + out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint ``` -where `cpp_type` is some libcudf enum with a specified underlying type. -This double-cast will be removed when we migrate to Cython 3, which adds proper support for C++ scoped enumerations. diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index 37544e1c7cd..06de6cc825f 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -106,6 +106,7 @@ foreach(target IN LISTS targets_using_arrow_headers) target_include_directories(${target} PRIVATE "${PYARROW_INCLUDE_DIR}") endforeach() +add_subdirectory(cpp) add_subdirectory(io) add_subdirectory(nvtext) add_subdirectory(pylibcudf) diff --git a/python/cudf/cudf/_lib/cpp/CMakeLists.txt b/python/cudf/cudf/_lib/cpp/CMakeLists.txt new file mode 100644 index 00000000000..a99aa58dfe8 --- /dev/null +++ b/python/cudf/cudf/_lib/cpp/CMakeLists.txt @@ -0,0 +1,23 @@ +# ============================================================================= +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources copying.pyx types.pyx) + +set(linked_libraries cudf::cudf) + +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp +) diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index 8961675711f..20725c252fc 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -19,9 +19,9 @@ from cudf._lib.exception_handler cimport cudf_exception_handler ctypedef const scalar constscalar cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: - ctypedef enum out_of_bounds_policy: - NULLIFY 'cudf::out_of_bounds_policy::NULLIFY' - DONT_CHECK 'cudf::out_of_bounds_policy::DONT_CHECK' + cpdef enum class out_of_bounds_policy(bool): + NULLIFY + DONT_CHECK cdef unique_ptr[table] gather ( const table_view& source_table, diff --git a/python/cudf/cudf/_lib/cpp/copying.pyx b/python/cudf/cudf/_lib/cpp/copying.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cudf/cudf/_lib/cpp/types.pxd b/python/cudf/cudf/_lib/cpp/types.pxd index 11480d774ef..14bf8a83de0 100644 --- a/python/cudf/cudf/_lib/cpp/types.pxd +++ b/python/cudf/cudf/_lib/cpp/types.pxd @@ -4,6 +4,11 @@ from libc.stdint cimport int32_t, uint32_t cdef extern from "cudf/types.hpp" namespace "cudf" nogil: + # The declaration below is to work around + # https://github.com/cython/cython/issues/5637 + """ + #define __PYX_ENUM_CLASS_DECL enum + """ ctypedef int32_t size_type ctypedef uint32_t bitmask_type ctypedef uint32_t char_utf8 @@ -49,36 +54,36 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil: ALL_EQUAL "cudf::nan_equality::ALL_EQUAL" NANS_UNEQUAL "cudf::nan_equality::UNEQUAL" - ctypedef enum type_id "cudf::type_id": - EMPTY "cudf::type_id::EMPTY" - INT8 "cudf::type_id::INT8" - INT16 "cudf::type_id::INT16" - INT32 "cudf::type_id::INT32" - INT64 "cudf::type_id::INT64" - UINT8 "cudf::type_id::UINT8" - UINT16 "cudf::type_id::UINT16" - UINT32 "cudf::type_id::UINT32" - UINT64 "cudf::type_id::UINT64" - FLOAT32 "cudf::type_id::FLOAT32" - FLOAT64 "cudf::type_id::FLOAT64" - BOOL8 "cudf::type_id::BOOL8" - TIMESTAMP_DAYS "cudf::type_id::TIMESTAMP_DAYS" - TIMESTAMP_SECONDS "cudf::type_id::TIMESTAMP_SECONDS" - TIMESTAMP_MILLISECONDS "cudf::type_id::TIMESTAMP_MILLISECONDS" - TIMESTAMP_MICROSECONDS "cudf::type_id::TIMESTAMP_MICROSECONDS" - TIMESTAMP_NANOSECONDS "cudf::type_id::TIMESTAMP_NANOSECONDS" - DICTIONARY32 "cudf::type_id::DICTIONARY32" - STRING "cudf::type_id::STRING" - LIST "cudf::type_id::LIST" - STRUCT "cudf::type_id::STRUCT" - NUM_TYPE_IDS "cudf::type_id::NUM_TYPE_IDS" - DURATION_SECONDS "cudf::type_id::DURATION_SECONDS" - DURATION_MILLISECONDS "cudf::type_id::DURATION_MILLISECONDS" - DURATION_MICROSECONDS "cudf::type_id::DURATION_MICROSECONDS" - DURATION_NANOSECONDS "cudf::type_id::DURATION_NANOSECONDS" - DECIMAL32 "cudf::type_id::DECIMAL32" - DECIMAL64 "cudf::type_id::DECIMAL64" - DECIMAL128 "cudf::type_id::DECIMAL128" + cpdef enum class type_id(int32_t): + EMPTY + INT8 + INT16 + INT32 + INT64 + UINT8 + UINT16 + UINT32 + UINT64 + FLOAT32 + FLOAT64 + BOOL8 + TIMESTAMP_DAYS + TIMESTAMP_SECONDS + TIMESTAMP_MILLISECONDS + TIMESTAMP_MICROSECONDS + TIMESTAMP_NANOSECONDS + DICTIONARY32 + STRING + LIST + STRUCT + NUM_TYPE_IDS + DURATION_SECONDS + DURATION_MILLISECONDS + DURATION_MICROSECONDS + DURATION_NANOSECONDS + DECIMAL32 + DECIMAL64 + DECIMAL128 cdef cppclass data_type: data_type() except + diff --git a/python/cudf/cudf/_lib/cpp/types.pyx b/python/cudf/cudf/_lib/cpp/types.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index b4f8bfad4fb..ba7822b0a54 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -5,13 +5,14 @@ from . cimport copying from .column cimport Column from .gpumemoryview cimport gpumemoryview from .table cimport Table -from .types cimport DataType, TypeId +# TODO: cimport type_id once +# https://github.com/cython/cython/issues/5609 is resolved +from .types cimport DataType __all__ = [ "Column", "DataType", "Table", - "TypeId", "copying", "gpumemoryview", ] diff --git a/python/cudf/cudf/_lib/pylibcudf/column.pyx b/python/cudf/cudf/_lib/pylibcudf/column.pyx index d9b2ca98ead..40afc8aaa8a 100644 --- a/python/cudf/cudf/_lib/pylibcudf/column.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/column.pyx @@ -9,7 +9,7 @@ from cudf._lib.cpp.column.column cimport column, column_contents from cudf._lib.cpp.types cimport size_type from .gpumemoryview cimport gpumemoryview -from .types cimport DataType, TypeId +from .types cimport DataType, type_id from .utils cimport int_to_bitmask_ptr, int_to_void_ptr @@ -179,10 +179,11 @@ cdef class Column: cpdef list children(self): return self._children + cdef class ListColumnView: """Accessor for methods of a Column that are specific to lists.""" def __init__(self, Column col): - if col.type().id() != TypeId.LIST: + if col.type().id() != type_id.LIST: raise TypeError("Column is not a list type") self._column = col diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pxd b/python/cudf/cudf/_lib/pylibcudf/copying.pxd index 0ebffacfb9f..d57be650710 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pxd @@ -2,30 +2,14 @@ from libcpp cimport bool as cbool -from cudf._lib.cpp cimport copying as cpp_copying +from cudf._lib.cpp.copying cimport out_of_bounds_policy from .column cimport Column from .table cimport Table -ctypedef cbool underlying_type_t_out_of_bounds_policy - - -# Enum representing possible enum policies. This is the Cython representation -# of libcudf's out_of_bounds_policy. -cpdef enum OutOfBoundsPolicy: - NULLIFY = cpp_copying.NULLIFY - DONT_CHECK = ( - cpp_copying.DONT_CHECK - ) - - -cdef cpp_copying.out_of_bounds_policy py_policy_to_c_policy( - OutOfBoundsPolicy py_policy -) nogil - cpdef Table gather( Table source_table, Column gather_map, - OutOfBoundsPolicy bounds_policy + out_of_bounds_policy bounds_policy ) diff --git a/python/cudf/cudf/_lib/pylibcudf/copying.pyx b/python/cudf/cudf/_lib/pylibcudf/copying.pyx index 7869a917983..a27b44b3107 100644 --- a/python/cudf/cudf/_lib/pylibcudf/copying.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/copying.pyx @@ -8,27 +8,23 @@ from libcpp.utility cimport move # we really want here would be # cimport libcudf... libcudf.copying.algo(...) from cudf._lib.cpp cimport copying as cpp_copying +from cudf._lib.cpp.copying cimport out_of_bounds_policy + +from cudf._lib.cpp.copying import \ + out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint + from cudf._lib.cpp.table.table cimport table from .column cimport Column from .table cimport Table -cdef inline cpp_copying.out_of_bounds_policy py_policy_to_c_policy( - OutOfBoundsPolicy py_policy -) nogil: - """Convert a Cython policy the corresponding libcudf policy type.""" - return ( - py_policy - ) - - # TODO: Is it OK to reference the corresponding libcudf algorithm in the # documentation? Otherwise there's a lot of room for duplication. cpdef Table gather( Table source_table, Column gather_map, - OutOfBoundsPolicy bounds_policy + out_of_bounds_policy bounds_policy ): """Select rows from source_table according to the provided gather_map. @@ -40,7 +36,7 @@ cpdef Table gather( The table object from which to pull data. gather_map : Column The list of row indices to pull out of the source table. - bounds_policy : OutOfBoundsPolicy + bounds_policy : out_of_bounds_policy Controls whether out of bounds indices are checked and nullified in the output or if indices are assumed to be in bounds. @@ -55,7 +51,7 @@ cpdef Table gather( cpp_copying.gather( source_table.view(), gather_map.view(), - py_policy_to_c_policy(bounds_policy) + bounds_policy ) ) return Table.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/_lib/pylibcudf/types.pxd b/python/cudf/cudf/_lib/pylibcudf/types.pxd index af0de6ba446..80baa484be7 100644 --- a/python/cudf/cudf/_lib/pylibcudf/types.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/types.pxd @@ -3,64 +3,13 @@ from libc.stdint cimport int32_t from libcpp cimport bool as cbool -from cudf._lib.cpp.types cimport data_type, interpolation, null_policy, type_id - -ctypedef int32_t underlying_type_t_type_id - - -# Enum representing possible data type ids. This is the Cython representation -# of libcudf's type_id. -cpdef enum TypeId: - EMPTY = type_id.EMPTY - INT8 = type_id.INT8 - INT16 = type_id.INT16 - INT32 = type_id.INT32 - INT64 = type_id.INT64 - UINT8 = type_id.UINT8 - UINT16 = type_id.UINT16 - UINT32 = type_id.UINT32 - UINT64 = type_id.UINT64 - FLOAT32 = type_id.FLOAT32 - FLOAT64 = type_id.FLOAT64 - BOOL8 = type_id.BOOL8 - TIMESTAMP_DAYS = type_id.TIMESTAMP_DAYS - TIMESTAMP_SECONDS = type_id.TIMESTAMP_SECONDS - TIMESTAMP_MILLISECONDS = ( - type_id.TIMESTAMP_MILLISECONDS - ) - TIMESTAMP_MICROSECONDS = ( - type_id.TIMESTAMP_MICROSECONDS - ) - TIMESTAMP_NANOSECONDS = ( - type_id.TIMESTAMP_NANOSECONDS - ) - DICTIONARY32 = type_id.DICTIONARY32 - STRING = type_id.STRING - LIST = type_id.LIST - STRUCT = type_id.STRUCT - NUM_TYPE_IDS = type_id.NUM_TYPE_IDS - DURATION_SECONDS = type_id.DURATION_SECONDS - DURATION_MILLISECONDS = ( - type_id.DURATION_MILLISECONDS - ) - DURATION_MICROSECONDS = ( - type_id.DURATION_MICROSECONDS - ) - DURATION_NANOSECONDS = ( - type_id.DURATION_NANOSECONDS - ) - DECIMAL32 = type_id.DECIMAL32 - DECIMAL64 = type_id.DECIMAL64 - DECIMAL128 = type_id.DECIMAL128 - - -cdef type_id py_type_to_c_type(TypeId py_type_id) nogil +from cudf._lib.cpp.types cimport data_type, type_id cdef class DataType: cdef data_type c_obj - cpdef TypeId id(self) + cpdef type_id id(self) cpdef int32_t scale(self) @staticmethod diff --git a/python/cudf/cudf/_lib/pylibcudf/types.pyx b/python/cudf/cudf/_lib/pylibcudf/types.pyx index 23d24182ac4..b1391723f0e 100644 --- a/python/cudf/cudf/_lib/pylibcudf/types.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/types.pyx @@ -2,11 +2,9 @@ from libc.stdint cimport int32_t -from cudf._lib.cpp.types cimport type_id +from cudf._lib.cpp.types cimport data_type, type_id - -cdef type_id py_type_to_c_type(TypeId py_type_id) nogil: - return ( py_type_id) +from cudf._lib.cpp.types import type_id as TypeId # no-cython-lint cdef class DataType: @@ -21,13 +19,13 @@ cdef class DataType: scale : int The scale associated with the data. Only used for decimal data types. """ - def __cinit__(self, TypeId id, int32_t scale=0): - self.c_obj = data_type(py_type_to_c_type(id), scale) + def __cinit__(self, type_id id, int32_t scale=0): + self.c_obj = data_type(id, scale) # TODO: Consider making both id and scale cached properties. - cpdef TypeId id(self): + cpdef type_id id(self): """Get the id associated with this data type.""" - return TypeId(self.c_obj.id()) + return self.c_obj.id() cpdef int32_t scale(self): """Get the scale associated with this data type.""" @@ -42,6 +40,6 @@ cdef class DataType: (even direct pylibcudf Cython users). """ # Spoof an empty data type then swap in the real one. - cdef DataType ret = DataType.__new__(DataType, TypeId.EMPTY) + cdef DataType ret = DataType.__new__(DataType, type_id.EMPTY) ret.c_obj = dt return ret diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 39a1b0609cf..0407785b2d8 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -200,23 +200,23 @@ cdef class DeviceScalar: if dtype is not None: s._dtype = dtype elif cdtype.id() in { - libcudf_types.DECIMAL32, - libcudf_types.DECIMAL64, - libcudf_types.DECIMAL128, + libcudf_types.type_id.DECIMAL32, + libcudf_types.type_id.DECIMAL64, + libcudf_types.type_id.DECIMAL128, }: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" ) - elif cdtype.id() == libcudf_types.STRUCT: + elif cdtype.id() == libcudf_types.type_id.STRUCT: struct_table_view = (s.get_raw_ptr())[0].view() s._dtype = StructDtype({ str(i): dtype_from_column_view(struct_table_view.column(i)) for i in range(struct_table_view.num_columns()) }) - elif cdtype.id() == libcudf_types.LIST: + elif cdtype.id() == libcudf_types.type_id.LIST: if ( s.get_raw_ptr() - )[0].view().type().id() == libcudf_types.LIST: + )[0].view().type().id() == libcudf_types.type_id.LIST: s._dtype = dtype_from_column_view( (s.get_raw_ptr())[0].view() ) @@ -442,27 +442,27 @@ cdef _get_np_scalar_from_numeric(unique_ptr[scalar]& s): cdef libcudf_types.data_type cdtype = s_ptr[0].type() - if cdtype.id() == libcudf_types.INT8: + if cdtype.id() == libcudf_types.type_id.INT8: return np.int8((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.INT16: + elif cdtype.id() == libcudf_types.type_id.INT16: return np.int16((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.INT32: + elif cdtype.id() == libcudf_types.type_id.INT32: return np.int32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.INT64: + elif cdtype.id() == libcudf_types.type_id.INT64: return np.int64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.UINT8: + elif cdtype.id() == libcudf_types.type_id.UINT8: return np.uint8((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.UINT16: + elif cdtype.id() == libcudf_types.type_id.UINT16: return np.uint16((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.UINT32: + elif cdtype.id() == libcudf_types.type_id.UINT32: return np.uint32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.UINT64: + elif cdtype.id() == libcudf_types.type_id.UINT64: return np.uint64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.FLOAT32: + elif cdtype.id() == libcudf_types.type_id.FLOAT32: return np.float32((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.FLOAT64: + elif cdtype.id() == libcudf_types.type_id.FLOAT64: return np.float64((s_ptr)[0].value()) - elif cdtype.id() == libcudf_types.BOOL8: + elif cdtype.id() == libcudf_types.type_id.BOOL8: return np.bool_((s_ptr)[0].value()) else: raise ValueError("Could not convert cudf::scalar to numpy scalar") @@ -475,15 +475,15 @@ cdef _get_py_decimal_from_fixed_point(unique_ptr[scalar]& s): cdef libcudf_types.data_type cdtype = s_ptr[0].type() - if cdtype.id() == libcudf_types.DECIMAL64: + if cdtype.id() == libcudf_types.type_id.DECIMAL64: rep_val = int((s_ptr)[0].value()) scale = int((s_ptr)[0].type().scale()) return decimal.Decimal(rep_val).scaleb(scale) - elif cdtype.id() == libcudf_types.DECIMAL32: + elif cdtype.id() == libcudf_types.type_id.DECIMAL32: rep_val = int((s_ptr)[0].value()) scale = int((s_ptr)[0].type().scale()) return decimal.Decimal(rep_val).scaleb(scale) - elif cdtype.id() == libcudf_types.DECIMAL128: + elif cdtype.id() == libcudf_types.type_id.DECIMAL128: rep_val = int((s_ptr)[0].value()) scale = int((s_ptr)[0].type().scale()) return decimal.Decimal(rep_val).scaleb(scale) @@ -499,28 +499,28 @@ cdef _get_np_scalar_from_timestamp64(unique_ptr[scalar]& s): cdef libcudf_types.data_type cdtype = s_ptr[0].type() - if cdtype.id() == libcudf_types.TIMESTAMP_SECONDS: + if cdtype.id() == libcudf_types.type_id.TIMESTAMP_SECONDS: return np.datetime64( ( s_ptr )[0].ticks_since_epoch_64(), datetime_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.TIMESTAMP_MILLISECONDS: + elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_MILLISECONDS: return np.datetime64( ( s_ptr )[0].ticks_since_epoch_64(), datetime_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.TIMESTAMP_MICROSECONDS: + elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_MICROSECONDS: return np.datetime64( ( s_ptr )[0].ticks_since_epoch_64(), datetime_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.TIMESTAMP_NANOSECONDS: + elif cdtype.id() == libcudf_types.type_id.TIMESTAMP_NANOSECONDS: return np.datetime64( ( s_ptr @@ -540,28 +540,28 @@ cdef _get_np_scalar_from_timedelta64(unique_ptr[scalar]& s): cdef libcudf_types.data_type cdtype = s_ptr[0].type() - if cdtype.id() == libcudf_types.DURATION_SECONDS: + if cdtype.id() == libcudf_types.type_id.DURATION_SECONDS: return np.timedelta64( ( s_ptr )[0].ticks(), duration_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.DURATION_MILLISECONDS: + elif cdtype.id() == libcudf_types.type_id.DURATION_MILLISECONDS: return np.timedelta64( ( s_ptr )[0].ticks(), duration_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.DURATION_MICROSECONDS: + elif cdtype.id() == libcudf_types.type_id.DURATION_MICROSECONDS: return np.timedelta64( ( s_ptr )[0].ticks(), duration_unit_map[(cdtype.id())] ) - elif cdtype.id() == libcudf_types.DURATION_NANOSECONDS: + elif cdtype.id() == libcudf_types.type_id.DURATION_NANOSECONDS: return np.timedelta64( ( s_ptr diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx index 177cbffddb0..2085d5c2896 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import cudf @@ -15,7 +15,7 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport ( is_fixed_point as cpp_is_fixed_point, to_fixed_point as cpp_to_fixed_point, ) -from cudf._lib.cpp.types cimport DECIMAL32, DECIMAL64, DECIMAL128, data_type +from cudf._lib.cpp.types cimport data_type, type_id @acquire_spill_lock() @@ -61,11 +61,11 @@ def to_decimal(Column input_col, object out_type): cdef int scale = out_type.scale cdef data_type c_out_type if isinstance(out_type, cudf.Decimal32Dtype): - c_out_type = data_type(DECIMAL32, -scale) + c_out_type = data_type(type_id.DECIMAL32, -scale) elif isinstance(out_type, cudf.Decimal64Dtype): - c_out_type = data_type(DECIMAL64, -scale) + c_out_type = data_type(type_id.DECIMAL64, -scale) elif isinstance(out_type, cudf.Decimal128Dtype): - c_out_type = data_type(DECIMAL128, -scale) + c_out_type = data_type(type_id.DECIMAL128, -scale) else: raise TypeError("should be a decimal dtype") with nogil: @@ -100,7 +100,7 @@ def is_fixed_point(Column input_col, object dtype): cdef unique_ptr[column] c_result cdef column_view source_view = input_col.view() cdef int scale = dtype.scale - cdef data_type c_dtype = data_type(DECIMAL64, -scale) + cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale) with nogil: c_result = move(cpp_is_fixed_point( source_view, diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 8594e37ac4a..929f8b447ab 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -236,6 +236,7 @@ cdef dtype_from_column_view(column_view cv): ] cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: + cdef libcudf_types.type_id tid if cudf.api.types.is_list_dtype(dtype): tid = libcudf_types.type_id.LIST elif cudf.api.types.is_struct_dtype(dtype):