diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 6b5e5b858f0..0b0fecb48a3 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -9,7 +9,7 @@ import rmm from cudf.api.types import dtype -from cudf import core, datasets, testing +from cudf import api, core, datasets, testing from cudf._version import get_versions from cudf.api.extensions import ( register_dataframe_accessor, diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx index d27ac533304..1b590db9e6d 100644 --- a/python/cudf/cudf/_lib/binaryop.pyx +++ b/python/cudf/cudf/_lib/binaryop.pyx @@ -24,7 +24,7 @@ from cudf._lib.cpp.scalar.scalar cimport scalar from cudf._lib.cpp.types cimport data_type, type_id from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id -from cudf.utils.dtypes import is_scalar, is_string_dtype +from cudf.api.types import is_scalar, is_string_dtype cimport cudf._lib.cpp.binaryop as cpp_binaryop from cudf._lib.cpp.binaryop cimport binary_operator diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index f833f6e3150..466be8dd21e 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -8,13 +8,13 @@ import rmm import cudf import cudf._lib as libcudfxx -from cudf.core.buffer import Buffer -from cudf.utils.dtypes import ( +from cudf.api.types import ( is_categorical_dtype, is_decimal_dtype, is_list_dtype, is_struct_dtype, ) +from cudf.core.buffer import Buffer from cpython.buffer cimport PyObject_CheckBuffer from libc.stdint cimport uintptr_t diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index 9912a7801a4..88dfc3a32f4 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -258,7 +258,7 @@ cdef csv_reader_options make_csv_reader_options( csv_reader_options_c.set_dtypes(c_dtypes_map) csv_reader_options_c.set_parse_hex(c_hex_col_names) elif ( - cudf.utils.dtypes.is_scalar(dtype) or + cudf.api.types.is_scalar(dtype) or isinstance(dtype, ( np.dtype, pd.core.dtypes.dtypes.ExtensionDtype, type )) @@ -515,7 +515,7 @@ cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +: # TODO: Remove this Error message once the # following issue is fixed: # https://github.com/rapidsai/cudf/issues/3960 - if cudf.utils.dtypes.is_categorical_dtype(dtype): + if cudf.api.types.is_categorical_dtype(dtype): raise NotImplementedError( "CategoricalDtype as dtype is not yet " "supported in CSV reader" diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 153b116cd33..e33dd0a5404 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -7,7 +7,7 @@ from pandas.core.groupby.groupby import DataError import rmm -from cudf.utils.dtypes import ( +from cudf.api.types import ( is_categorical_dtype, is_decimal_dtype, is_interval_dtype, diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 972a93e55ec..8e45011228a 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -27,7 +27,7 @@ import io import os import cudf -from cudf.utils.dtypes import is_struct_dtype +from cudf.api.types import is_struct_dtype # Converts the Python source input to libcudf++ IO source_info diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 68d9da57e83..483cddcfa22 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -117,7 +117,7 @@ cpdef read_json(object filepaths_or_buffers, column_names=column_names) cdef data_type _get_cudf_data_type_from_dtype(object dtype) except +: - if cudf.utils.dtypes.is_categorical_dtype(dtype): + if cudf.api.types.is_categorical_dtype(dtype): raise NotImplementedError( "CategoricalDtype as dtype is not yet " "supported in JSON reader" diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index e12a61f2a49..63133741818 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -18,13 +18,13 @@ except ImportError: import numpy as np from cython.operator import dereference -from cudf.utils.dtypes import ( +from cudf.api.types import ( is_categorical_dtype, is_decimal_dtype, is_list_dtype, is_struct_dtype, - np_to_pa_dtype, ) +from cudf.utils.dtypes import np_to_pa_dtype from cudf._lib.utils cimport data_from_unique_ptr, get_column_names diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx index 87da5526d3c..c6307d6cdb9 100644 --- a/python/cudf/cudf/_lib/reduce.pyx +++ b/python/cudf/cudf/_lib/reduce.pyx @@ -1,7 +1,8 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. import cudf -from cudf.utils.dtypes import is_decimal_dtype +from cudf.api.types import is_decimal_dtype +from cudf.core.dtypes import Decimal64Dtype from cudf._lib.column cimport Column from cudf._lib.cpp.column.column cimport column diff --git a/python/cudf/cudf/_lib/replace.pyx b/python/cudf/cudf/_lib/replace.pyx index 2ae0835566b..2d7f56dc5ce 100644 --- a/python/cudf/cudf/_lib/replace.pyx +++ b/python/cudf/cudf/_lib/replace.pyx @@ -3,7 +3,7 @@ from libcpp.memory cimport unique_ptr from libcpp.utility cimport move -from cudf.utils.dtypes import is_scalar +from cudf.api.types import is_scalar from cudf._lib.column cimport Column diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index c0cae16d9ef..41e8f0347d1 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -61,8 +61,6 @@ from cudf._lib.cpp.wrappers.timestamps cimport ( ) from cudf._lib.utils cimport data_from_table_view -import cudf - cdef class DeviceScalar: diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx index 0f8f0b6ea14..8183bd21823 100644 --- a/python/cudf/cudf/_lib/transpose.pyx +++ b/python/cudf/cudf/_lib/transpose.pyx @@ -1,7 +1,7 @@ # Copyright (c) 2020, NVIDIA CORPORATION. import cudf -from cudf.utils.dtypes import is_categorical_dtype +from cudf.api.types import is_categorical_dtype from libcpp.memory cimport unique_ptr from libcpp.pair cimport pair diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx index 2b6f3e8b4c1..acca61cf9d1 100644 --- a/python/cudf/cudf/_lib/unary.pyx +++ b/python/cudf/cudf/_lib/unary.pyx @@ -2,7 +2,7 @@ from enum import IntEnum -from cudf.utils.dtypes import is_decimal_dtype +from cudf.api.types import is_decimal_dtype from libcpp cimport bool from libcpp.memory cimport unique_ptr diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 2456aa334e9..2e81ba083fb 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -22,14 +22,13 @@ try: except ImportError: import json -from cudf.utils.dtypes import ( +from cudf.api.types import ( is_categorical_dtype, is_decimal_dtype, is_list_dtype, is_struct_dtype, - np_dtypes_to_pandas_dtypes, - np_to_pa_dtype, ) +from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes, np_to_pa_dtype PARQUET_META_TYPE_MAP = { str(cudf_dtype): str(pandas_dtype) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 163af62677e..1fe59d3dfd6 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -10,15 +10,13 @@ import cudf from cudf._typing import DtypeObj -from cudf.api.types import is_dtype_equal, is_integer +from cudf.api.types import is_dtype_equal, is_integer, is_list_like, is_scalar from cudf.core.abc import Serializable from cudf.core.column import ColumnBase, column from cudf.core.column_accessor import ColumnAccessor from cudf.utils import ioutils from cudf.utils.dtypes import ( - is_list_like, is_mixed_with_object_dtype, - is_scalar, numeric_normalize_types, ) from cudf.utils.utils import cached_property diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py index ea3c7bfb91f..eef0f284f40 100644 --- a/python/cudf/cudf/core/_internals/where.py +++ b/python/cudf/cudf/core/_internals/where.py @@ -41,10 +41,10 @@ def _check_and_cast_columns_with_other( Returns type-casted column `source_col` & scalar `other_scalar` based on `inplace` parameter. """ - if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype): + if cudf.api.types.is_categorical_dtype(source_col.dtype): return source_col, other - if cudf.utils.dtypes.is_scalar(other): + if cudf.api.types.is_scalar(other): device_obj = _normalize_scalars(source_col, other) else: device_obj = other @@ -66,10 +66,8 @@ def _check_and_cast_columns_with_other( return source_col, device_obj.astype(source_col.dtype) else: if ( - cudf.utils.dtypes.is_scalar(other) - and cudf.utils.dtypes._is_non_decimal_numeric_dtype( - source_col.dtype - ) + cudf.api.types.is_scalar(other) + and cudf.api.types._is_non_decimal_numeric_dtype(source_col.dtype) and cudf.utils.dtypes._can_cast(other, source_col.dtype) ): common_dtype = source_col.dtype @@ -82,11 +80,11 @@ def _check_and_cast_columns_with_other( [ source_col.dtype, np.min_scalar_type(other) - if cudf.utils.dtypes.is_scalar(other) + if cudf.api.types.is_scalar(other) else other.dtype, ] ) - if cudf.utils.dtypes.is_scalar(device_obj): + if cudf.api.types.is_scalar(device_obj): device_obj = cudf.Scalar(other, dtype=common_dtype) else: device_obj = device_obj.astype(common_dtype) @@ -132,9 +130,9 @@ def _normalize_columns_and_scalars_type( other_df._data[self_col] = other_col return source_df, other_df - elif isinstance( - frame, (Series, Index) - ) and not cudf.utils.dtypes.is_scalar(other): + elif isinstance(frame, (Series, Index)) and not cudf.api.types.is_scalar( + other + ): other = cudf.core.column.as_column(other) input_col = frame._data[frame.name] return _check_and_cast_columns_with_other( @@ -142,7 +140,7 @@ def _normalize_columns_and_scalars_type( ) else: # Handles scalar or list/array like scalars - if isinstance(frame, (Series, Index)) and cudf.utils.dtypes.is_scalar( + if isinstance(frame, (Series, Index)) and cudf.api.types.is_scalar( other ): input_col = frame._data[frame.name] @@ -162,7 +160,7 @@ def _normalize_columns_and_scalars_type( ) = _check_and_cast_columns_with_other( source_col=source_df._data[col_name], other=other - if cudf.utils.dtypes.is_scalar(other) + if cudf.api.types.is_scalar(other) else other[i], inplace=inplace, ) @@ -284,7 +282,7 @@ def where( other_column = others[i] if column_name in cond._data: if isinstance(input_col, cudf.core.column.CategoricalColumn): - if cudf.utils.dtypes.is_scalar(other_column): + if cudf.api.types.is_scalar(other_column): try: other_column = input_col._encode(other_column) except ValueError: @@ -346,7 +344,7 @@ def where( ) if isinstance(input_col, cudf.core.column.CategoricalColumn): - if cudf.utils.dtypes.is_scalar(other): + if cudf.api.types.is_scalar(other): try: other = input_col._encode(other) except ValueError: diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 76dd0683a5a..c057b729fd1 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -24,13 +24,12 @@ from cudf import _lib as libcudf from cudf._lib.transform import bools_to_mask from cudf._typing import ColumnLike, Dtype, ScalarLike +from cudf.api.types import is_categorical_dtype, is_interval_dtype from cudf.core.buffer import Buffer from cudf.core.column import column from cudf.core.column.methods import ColumnMethods from cudf.core.dtypes import CategoricalDtype from cudf.utils.dtypes import ( - is_categorical_dtype, - is_interval_dtype, is_mixed_with_object_dtype, min_signed_type, min_unsigned_type, @@ -802,7 +801,7 @@ def unary_operator(self, unaryop: str): ) def __setitem__(self, key, value): - if cudf.utils.dtypes.is_scalar( + if cudf.api.types.is_scalar( value ) and cudf._lib.scalar._is_null_host_scalar(value): to_add_categories = 0 @@ -817,7 +816,7 @@ def __setitem__(self, key, value): "category, set the categories first" ) - if cudf.utils.dtypes.is_scalar(value): + if cudf.api.types.is_scalar(value): value = self._encode(value) if value is not None else value else: value = cudf.core.column.as_column(value).astype(self.dtype) diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index a005691661f..0d4edbf0113 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -15,6 +15,7 @@ import cudf from cudf import _lib as libcudf from cudf._typing import DatetimeLikeScalar, Dtype, DtypeObj, ScalarLike +from cudf.api.types import is_scalar from cudf.core._compat import PANDAS_GE_120 from cudf.core.buffer import Buffer from cudf.core.column import ( @@ -24,7 +25,6 @@ column_empty_like, string, ) -from cudf.utils.dtypes import is_scalar from cudf.utils.utils import _fillna_natwise if PANDAS_GE_120: diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 47f39eb570d..b13ad8664dc 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -15,11 +15,10 @@ from_decimal as cpp_from_decimal, ) from cudf._typing import Dtype -from cudf.api.types import is_integer_dtype +from cudf.api.types import is_integer_dtype, is_scalar from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype -from cudf.utils.dtypes import is_scalar from cudf.utils.utils import pa_mask_buffer_to_mask from .numerical_base import NumericalBaseColumn diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index d5c1ff649f7..a873a0f98a5 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -3,9 +3,9 @@ import pyarrow as pa import cudf +from cudf.api.types import is_interval_dtype from cudf.core.column import StructColumn from cudf.core.dtypes import IntervalDtype -from cudf.utils.dtypes import is_interval_dtype class IntervalColumn(StructColumn): diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 60530d10280..937f82145b7 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -19,11 +19,11 @@ ) from cudf._lib.table import Table from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike +from cudf.api.types import _is_non_decimal_numeric_dtype, is_list_dtype from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column, column from cudf.core.column.methods import ColumnMethods, ParentType from cudf.core.dtypes import ListDtype -from cudf.utils.dtypes import _is_non_decimal_numeric_dtype, is_list_dtype class ListColumn(ColumnBase): diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py index 388cdb0ca79..c26b8b7e09c 100644 --- a/python/cudf/cudf/core/column/numerical_base.py +++ b/python/cudf/cudf/core/column/numerical_base.py @@ -128,7 +128,7 @@ def skew(self, skipna: bool = None) -> ScalarLike: def quantile( self, q: Union[float, Sequence[float]], interpolation: str, exact: bool ) -> NumericalBaseColumn: - if isinstance(q, Number) or cudf.utils.dtypes.is_list_like(q): + if isinstance(q, Number) or cudf.api.types.is_list_like(q): np_array_q = np.asarray(q) if np.logical_or(np_array_q < 0, np_array_q > 1).any(): raise ValueError( diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c4b07c41b06..c14cbd11714 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -27,18 +27,18 @@ from cudf import _lib as libcudf from cudf._lib import string_casting as str_cast, strings as libstrings from cudf._lib.column import Column -from cudf.core.buffer import Buffer -from cudf.core.column import column, datetime -from cudf.core.column.methods import ColumnMethods, ParentType -from cudf.utils import utils -from cudf.utils.docutils import copy_docstring -from cudf.utils.dtypes import ( - can_convert_to_column, +from cudf.api.types import ( is_integer, is_list_dtype, is_scalar, is_string_dtype, ) +from cudf.core.buffer import Buffer +from cudf.core.column import column, datetime +from cudf.core.column.methods import ColumnMethods, ParentType +from cudf.utils import utils +from cudf.utils.docutils import copy_docstring +from cudf.utils.dtypes import can_convert_to_column def str_to_boolean(column: StringColumn): diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index c035a5bfb6b..7167918d14d 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -5,10 +5,10 @@ import cudf from cudf._typing import Dtype +from cudf.api.types import is_struct_dtype from cudf.core.column import ColumnBase, build_struct_column from cudf.core.column.methods import ColumnMethods from cudf.core.dtypes import StructDtype -from cudf.utils.dtypes import is_struct_dtype class StructColumn(ColumnBase): diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index 7e03e87ac0a..a3888d30f30 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -19,10 +19,11 @@ DtypeObj, ScalarLike, ) +from cudf.api.types import is_scalar from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, column, string from cudf.core.column.datetime import _numpy_to_pandas_conversion -from cudf.utils.dtypes import is_scalar, np_to_pa_dtype +from cudf.utils.dtypes import np_to_pa_dtype from cudf.utils.utils import _fillna_natwise _dtype_to_format_conversion = { diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 91f623a3cd3..039dba8a715 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -5,11 +5,9 @@ import pandas as pd import cudf +from cudf.api.types import is_list_like from cudf.core.column import as_column, build_categorical_column from cudf.core.index import IntervalIndex, interval_range -from cudf.utils.dtypes import is_list_like - -# from cudf._lib.filling import sequence def cut( diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 4239a55118f..d9e2333be54 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -26,7 +26,20 @@ import cudf import cudf.core.common from cudf import _lib as libcudf -from cudf.api.types import is_bool_dtype, is_dict_like, is_dtype_equal +from cudf.api.types import ( + _is_scalar_or_zero_d_array, + is_bool_dtype, + is_categorical_dtype, + is_datetime_dtype, + is_dict_like, + is_dtype_equal, + is_list_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, + is_string_dtype, + is_struct_dtype, +) from cudf.core import column, reshape from cudf.core.abc import Serializable from cudf.core.column import ( @@ -44,19 +57,10 @@ from cudf.utils import applyutils, docutils, ioutils, queryutils, utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( - _is_scalar_or_zero_d_array, can_convert_to_column, cudf_dtype_from_pydata_dtype, find_common_type, - is_categorical_dtype, is_column_like, - is_datetime_dtype, - is_list_dtype, - is_list_like, - is_numerical_dtype, - is_scalar, - is_string_dtype, - is_struct_dtype, min_scalar_type, numeric_normalize_types, ) @@ -7606,7 +7610,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes): # default to the first non-null dtype dtypes[idx] = cols[0].dtype # If all the non-null dtypes are int/float, find a common dtype - if all(is_numerical_dtype(col.dtype) for col in cols): + if all(is_numeric_dtype(col.dtype) for col in cols): dtypes[idx] = find_common_type([col.dtype for col in cols]) # If all categorical dtypes, combine the categories elif all( diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 037f6f7ff94..6f6befa5767 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -25,7 +25,15 @@ import cudf from cudf import _lib as libcudf from cudf._typing import ColumnLike, DataFrameOrSeries -from cudf.api.types import is_dict_like, issubdtype +from cudf.api.types import ( + _is_non_decimal_numeric_dtype, + _is_scalar_or_zero_d_array, + is_decimal_dtype, + is_dict_like, + is_integer_dtype, + is_scalar, + issubdtype, +) from cudf.core.column import ( ColumnBase, as_column, @@ -37,14 +45,7 @@ from cudf.core.window import Rolling from cudf.utils import ioutils from cudf.utils.docutils import copy_docstring -from cudf.utils.dtypes import ( - _is_non_decimal_numeric_dtype, - _is_scalar_or_zero_d_array, - is_column_like, - is_decimal_dtype, - is_integer_dtype, - is_scalar, -) +from cudf.utils.dtypes import is_column_like T = TypeVar("T", bound="Frame") @@ -1989,7 +1990,7 @@ def from_arrow(cls, data): dtype = np_dtypes[name] elif pandas_dtypes[ name - ] == "object" and cudf.utils.dtypes.is_struct_dtype( + ] == "object" and cudf.api.types.is_struct_dtype( np_dtypes[name] ): # Incase of struct column, libcudf is not aware of names of @@ -5057,7 +5058,7 @@ def _get_replacement_values_for_columns( if is_scalar(to_replace) and is_scalar(value): to_replace_columns = {col: [to_replace] for col in columns_dtype_map} values_columns = {col: [value] for col in columns_dtype_map} - elif cudf.utils.dtypes.is_list_like(to_replace) or isinstance( + elif cudf.api.types.is_list_like(to_replace) or isinstance( to_replace, ColumnBase ): if is_scalar(value): @@ -5070,7 +5071,7 @@ def _get_replacement_values_for_columns( ) for col in columns_dtype_map } - elif cudf.utils.dtypes.is_list_like(value): + elif cudf.api.types.is_list_like(value): if len(to_replace) != len(value): raise ValueError( f"Replacement lists must be " diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 7a4b221bf6f..6f96f9e114f 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -11,9 +11,9 @@ from cudf._lib import groupby as libgroupby from cudf._lib.table import Table from cudf._typing import DataFrameOrSeries +from cudf.api.types import is_list_like from cudf.core.abc import Serializable from cudf.core.column.column import arange -from cudf.utils.dtypes import is_list_like from cudf.utils.utils import GetAttrGetItemMixin, cached_property @@ -504,7 +504,7 @@ def mult(df): if not len(chunk_results): return self.obj.head(0) - if cudf.utils.dtypes.is_scalar(chunk_results[0]): + if cudf.api.types.is_scalar(chunk_results[0]): result = cudf.Series(chunk_results, index=group_names) result.index.names = self.grouping.names elif isinstance(chunk_results[0], cudf.Series): diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b009d12262f..496e8e2ea73 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -28,7 +28,13 @@ from cudf._lib.filling import sequence from cudf._lib.search import search_sorted from cudf._lib.table import Table -from cudf.api.types import _is_scalar_or_zero_d_array, is_string_dtype +from cudf.api.types import ( + _is_non_decimal_numeric_dtype, + _is_scalar_or_zero_d_array, + is_categorical_dtype, + is_interval_dtype, + is_string_dtype, +) from cudf.core._base_index import BaseIndex from cudf.core.column import ( CategoricalColumn, @@ -46,12 +52,7 @@ from cudf.core.dtypes import IntervalDtype from cudf.core.frame import Frame, SingleColumnFrame from cudf.utils.docutils import copy_docstring -from cudf.utils.dtypes import ( - _is_non_decimal_numeric_dtype, - find_common_type, - is_categorical_dtype, - is_interval_dtype, -) +from cudf.utils.dtypes import find_common_type from cudf.utils.utils import cached_property, search_range T = TypeVar("T", bound="Frame") diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index da999f13fa8..ebe09a81bc2 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -12,20 +12,20 @@ from cudf._lib.scalar import _is_null_host_scalar from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike from cudf.api.types import ( + _is_non_decimal_numeric_dtype, + _is_scalar_or_zero_d_array, is_bool_dtype, + is_categorical_dtype, is_integer, is_integer_dtype, + is_list_like, is_numeric_dtype, + is_scalar, ) from cudf.core.column.column import as_column from cudf.utils.dtypes import ( - _is_non_decimal_numeric_dtype, - _is_scalar_or_zero_d_array, find_common_type, - is_categorical_dtype, is_column_like, - is_list_like, - is_scalar, to_cudf_compatible_scalar, ) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 8d2b05ef4ec..0bf14b5d81f 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -241,23 +241,23 @@ def rename(self, names, inplace=False): def set_names(self, names, level=None, inplace=False): if ( level is not None - and not cudf.utils.dtypes.is_list_like(level) - and cudf.utils.dtypes.is_list_like(names) + and not cudf.api.types.is_list_like(level) + and cudf.api.types.is_list_like(names) ): raise TypeError( "Names must be a string when a single level is provided." ) if ( - not cudf.utils.dtypes.is_list_like(names) + not cudf.api.types.is_list_like(names) and level is None and self.nlevels > 1 ): raise TypeError("Must pass list-like as `names`.") - if not cudf.utils.dtypes.is_list_like(names): + if not cudf.api.types.is_list_like(names): names = [names] - if level is not None and not cudf.utils.dtypes.is_list_like(level): + if level is not None and not cudf.api.types.is_list_like(level): level = [level] if level is not None and len(names) != len(level): @@ -700,7 +700,7 @@ def isin(self, values, level=None): >>> midx.isin([(1, 'red'), (3, 'red')]) array([ True, False, False]) """ - from cudf.utils.dtypes import is_list_like + from cudf.api.types import is_list_like if level is None: if isinstance(values, cudf.MultiIndex): diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 392a251dfc4..a2155deb51e 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -497,7 +497,7 @@ def melt( # Error for unimplemented support for datatype dtypes = [frame[col].dtype for col in id_vars + value_vars] - if any(cudf.utils.dtypes.is_categorical_dtype(t) for t in dtypes): + if any(cudf.api.types.is_categorical_dtype(t) for t in dtypes): raise NotImplementedError( "Categorical columns are not yet " "supported for function" ) @@ -1067,7 +1067,7 @@ def _get_unique(column, dummy_na): def _length_check_params(obj, columns, name): - if cudf.utils.dtypes.is_list_like(obj): + if cudf.api.types.is_list_like(obj): if len(obj) != len(columns): raise ValueError( f"Length of '{name}' ({len(obj)}) did not match the " diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 8ccd967b1b3..55400d03bf7 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -18,7 +18,18 @@ import cudf from cudf import _lib as libcudf from cudf._lib.transform import bools_to_mask -from cudf.api.types import is_bool_dtype, is_dict_like, is_dtype_equal +from cudf.api.types import ( + is_bool_dtype, + is_categorical_dtype, + is_decimal_dtype, + is_dict_like, + is_dtype_equal, + is_interval_dtype, + is_list_dtype, + is_list_like, + is_scalar, + is_struct_dtype, +) from cudf.core.abc import Serializable from cudf.core.column import ( DatetimeColumn, @@ -46,14 +57,7 @@ from cudf.utils.dtypes import ( can_convert_to_column, find_common_type, - is_categorical_dtype, - is_decimal_dtype, - is_interval_dtype, - is_list_dtype, - is_list_like, is_mixed_with_object_dtype, - is_scalar, - is_struct_dtype, min_scalar_type, ) from cudf.utils.utils import ( diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 4856995b391..b0fb2fb4274 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -11,9 +11,9 @@ from cudf._lib.strings.convert.convert_integers import ( is_integer as cpp_is_integer, ) +from cudf.api.types import is_integer, is_scalar from cudf.core import column from cudf.core.index import as_index -from cudf.utils.dtypes import is_integer, is_scalar _unit_dtype_map = { "ns": "datetime64[ns]", diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index d5c4df12246..776b9e0a47c 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -8,10 +8,8 @@ import cudf from cudf import _lib as libcudf from cudf._lib import strings as libstrings -from cudf.core.column import as_column -from cudf.utils.dtypes import ( +from cudf.api.types import ( _is_non_decimal_numeric_dtype, - can_convert_to_column, is_categorical_dtype, is_datetime_dtype, is_list_dtype, @@ -19,6 +17,8 @@ is_struct_dtype, is_timedelta_dtype, ) +from cudf.core.column import as_column +from cudf.utils.dtypes import can_convert_to_column def to_numeric(arg, errors="raise", downcast=None): diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index 582c5324b8f..61f3457087c 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -6,8 +6,8 @@ import cudf from cudf import _lib as libcudf +from cudf.api.types import is_scalar from cudf.utils import ioutils -from cudf.utils.dtypes import is_scalar @annotate("READ_CSV", color="purple", domain="cudf_python") diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py index 8a00d9c73a0..a48cfd07d3f 100644 --- a/python/cudf/cudf/io/json.py +++ b/python/cudf/cudf/io/json.py @@ -6,8 +6,8 @@ import cudf from cudf._lib import json as libjson +from cudf.api.types import is_list_like from cudf.utils import ioutils -from cudf.utils.dtypes import is_list_like @ioutils.doc_read_json() diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py index 8f6002bb577..73fbd50c824 100644 --- a/python/cudf/cudf/io/orc.py +++ b/python/cudf/cudf/io/orc.py @@ -9,8 +9,8 @@ import cudf from cudf._lib import orc as liborc +from cudf.api.types import is_list_like from cudf.utils import ioutils -from cudf.utils.dtypes import is_list_like from cudf.utils.metadata import ( # type: ignore orc_column_statistics_pb2 as cs_pb2, ) diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index fa748761695..a0713bbce2e 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -8,8 +8,8 @@ import cudf from cudf._lib import parquet as libparquet +from cudf.api.types import is_list_like from cudf.utils import ioutils -from cudf.utils.dtypes import is_list_like def _get_partition_groups(df, partition_cols, preserve_index=False): diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index e0f9b3a6efa..d5e9142934b 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -9,9 +9,8 @@ import pandas as pd import cudf -from cudf.api.types import is_numeric_dtype +from cudf.api.types import is_categorical_dtype, is_numeric_dtype from cudf.core._compat import PANDAS_GE_110 -from cudf.utils.dtypes import is_categorical_dtype def _check_isinstance(left, right, obj): diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index cc4c98b611f..b82f736fe89 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -51,10 +51,10 @@ def test_column_offset_and_size(pandas_input, offset, size): children=col.base_children, ) - if cudf.utils.dtypes.is_categorical_dtype(col.dtype): + if cudf.api.types.is_categorical_dtype(col.dtype): assert col.size == col.codes.size assert col.size == (col.codes.data.size / col.codes.dtype.itemsize) - elif cudf.utils.dtypes.is_string_dtype(col.dtype): + elif cudf.api.types.is_string_dtype(col.dtype): if col.size > 0: assert col.size == (col.children[0].size - 1) assert col.size == ( @@ -91,7 +91,7 @@ def column_slicing_test(col, offset, size, cast_to_float=False): else: pd_series = series.to_pandas() - if cudf.utils.dtypes.is_categorical_dtype(col.dtype): + if cudf.api.types.is_categorical_dtype(col.dtype): # The cudf.Series is constructed from an already sliced column, whereas # the pandas.Series is constructed from the unsliced series and then # sliced, so the indexes should be different and we must ignore it. diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 3983e8a5f4a..35cf9158588 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -8,9 +8,9 @@ import pytest import cudf as gd +from cudf.api.types import is_categorical_dtype from cudf.core.dtypes import Decimal64Dtype from cudf.testing._utils import assert_eq, assert_exceptions_equal -from cudf.utils.dtypes import is_categorical_dtype def make_frames(index=None, nulls="none"): diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3b74fe91e05..a20ff7e4a0a 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -4429,7 +4429,7 @@ def test_isin_dataframe(data, values): pdf = data gdf = cudf.from_pandas(pdf) - if cudf.utils.dtypes.is_scalar(values): + if cudf.api.types.is_scalar(values): assert_exceptions_equal( lfunc=pdf.isin, rfunc=gdf.isin, diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index debc0ffdd53..5100f1a9c49 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -11,27 +11,6 @@ from pandas.core.dtypes.common import infer_dtype_from_object import cudf -from cudf.api.types import ( # noqa: F401 - _is_non_decimal_numeric_dtype, - _is_scalar_or_zero_d_array, - infer_dtype, - is_categorical_dtype, - is_datetime_dtype as is_datetime_dtype, - is_decimal32_dtype, - is_decimal64_dtype, - is_decimal_dtype, - is_integer, - is_integer_dtype, - is_interval_dtype, - is_list_dtype, - is_list_like, - is_numeric_dtype as is_numerical_dtype, - is_scalar, - is_string_dtype, - is_struct_dtype, - is_timedelta_dtype, - pandas_dtype, -) from cudf.core._compat import PANDAS_GE_120 _NA_REP = "" @@ -191,11 +170,11 @@ def cudf_dtype_from_pydata_dtype(dtype): Python dtype. """ - if is_categorical_dtype(dtype): + if cudf.api.types.is_categorical_dtype(dtype): return cudf.core.dtypes.CategoricalDtype - elif is_decimal32_dtype(dtype): + elif cudf.api.types.is_decimal32_dtype(dtype): return cudf.core.dtypes.Decimal32Dtype - elif is_decimal64_dtype(dtype): + elif cudf.api.types.is_decimal64_dtype(dtype): return cudf.core.dtypes.Decimal64Dtype elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_LIBCUDF_TYPES: return dtype.type @@ -207,12 +186,12 @@ def cudf_dtype_to_pa_type(dtype): """ Given a cudf pandas dtype, converts it into the equivalent cuDF Python dtype. """ - if is_categorical_dtype(dtype): + if cudf.api.types.is_categorical_dtype(dtype): raise NotImplementedError() elif ( - is_list_dtype(dtype) - or is_struct_dtype(dtype) - or is_decimal_dtype(dtype) + cudf.api.types.is_list_dtype(dtype) + or cudf.api.types.is_struct_dtype(dtype) + or cudf.api.types.is_decimal_dtype(dtype) ): return dtype.to_arrow() else: @@ -230,7 +209,7 @@ def cudf_dtype_from_pa_type(typ): elif pa.types.is_decimal(typ): return cudf.core.dtypes.Decimal64Dtype.from_arrow(typ) else: - return pandas_dtype(typ.to_pandas_dtype()) + return cudf.api.types.pandas_dtype(typ.to_pandas_dtype()) def to_cudf_compatible_scalar(val, dtype=None): @@ -246,7 +225,7 @@ def to_cudf_compatible_scalar(val, dtype=None): ): return val - if not _is_scalar_or_zero_d_array(val): + if not cudf.api.types._is_scalar_or_zero_d_array(val): raise ValueError( f"Cannot convert value of type {type(val).__name__} " "to cudf scalar" @@ -258,7 +237,9 @@ def to_cudf_compatible_scalar(val, dtype=None): if isinstance(val, (np.ndarray, cp.ndarray)) and val.ndim == 0: val = val.item() - if ((dtype is None) and isinstance(val, str)) or is_string_dtype(dtype): + if ( + (dtype is None) and isinstance(val, str) + ) or cudf.api.types.is_string_dtype(dtype): dtype = "str" if isinstance(val, dt.datetime): @@ -270,7 +251,7 @@ def to_cudf_compatible_scalar(val, dtype=None): elif isinstance(val, pd.Timedelta): val = val.to_timedelta64() - val = pandas_dtype(type(val)).type(val) + val = cudf.api.types.pandas_dtype(type(val)).type(val) if dtype is not None: val = val.astype(dtype) @@ -338,7 +319,7 @@ def can_convert_to_column(obj): Boolean: True or False depending on whether the input `obj` is column-compatible or not. """ - return is_column_like(obj) or is_list_like(obj) + return is_column_like(obj) or cudf.api.types.is_list_like(obj) def min_scalar_type(a, min_size=8): @@ -516,20 +497,27 @@ def find_common_type(dtypes): # Aggregate same types dtypes = set(dtypes) - if any(is_decimal_dtype(dtype) for dtype in dtypes): + if any(cudf.api.types.is_decimal_dtype(dtype) for dtype in dtypes): if all( - is_decimal_dtype(dtype) or is_numerical_dtype(dtype) + cudf.api.types.is_decimal_dtype(dtype) + or cudf.api.types.is_numeric_dtype(dtype) for dtype in dtypes ): return _find_common_type_decimal( - [dtype for dtype in dtypes if is_decimal_dtype(dtype)] + [ + dtype + for dtype in dtypes + if cudf.api.types.is_decimal_dtype(dtype) + ] ) else: return cudf.dtype("O") # Corner case 1: # Resort to np.result_type to handle "M" and "m" types separately - dt_dtypes = set(filter(lambda t: is_datetime_dtype(t), dtypes)) + dt_dtypes = set( + filter(lambda t: cudf.api.types.is_datetime_dtype(t), dtypes) + ) if len(dt_dtypes) > 0: dtypes = dtypes - dt_dtypes dtypes.add(np.result_type(*dt_dtypes)) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 4932a72c2c4..e897571807b 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -28,7 +28,7 @@ ) import cudf -from cudf.utils.dtypes import is_string_dtype +from cudf.api.types import is_string_dtype from .core import DataFrame, Index, Series @@ -248,7 +248,7 @@ def tolist_cudf(obj): (cudf.Series, cudf.BaseIndex, cudf.CategoricalDtype, Series) ) def is_categorical_dtype_cudf(obj): - return cudf.utils.dtypes.is_categorical_dtype(obj) + return cudf.api.types.is_categorical_dtype(obj) try: @@ -271,7 +271,7 @@ def percentile_cudf(a, q, interpolation="linear"): if isinstance(q, Iterator): q = list(q) - if cudf.utils.dtypes.is_categorical_dtype(a.dtype): + if cudf.api.types.is_categorical_dtype(a.dtype): result = cp.percentile(a.cat.codes, q, interpolation=interpolation) return ( diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py index f1fb408b0d1..e604e5511da 100644 --- a/python/dask_cudf/dask_cudf/core.py +++ b/python/dask_cudf/dask_cudf/core.py @@ -169,7 +169,7 @@ def set_index(self, other, sorted=False, divisions=None, **kwargs): or isinstance(divisions, (cudf.DataFrame, cudf.Series)) or ( isinstance(other, str) - and cudf.utils.dtypes.is_string_dtype(self[other].dtype) + and cudf.api.types.is_string_dtype(self[other].dtype) ) ): diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py index 8aa68be64a5..9949016d6a7 100644 --- a/python/dask_cudf/dask_cudf/sorting.py +++ b/python/dask_cudf/dask_cudf/sorting.py @@ -13,7 +13,7 @@ from dask.utils import M import cudf as gd -from cudf.utils.dtypes import is_categorical_dtype +from cudf.api.types import is_categorical_dtype def set_index_post(df, index_name, drop, column_dtype):