Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Deprecate Index.is_* methods #12820

Merged
merged 23 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
51c49ec
Deprecate is_boolean
galipremsagar Feb 21, 2023
a32a895
Deprecate is_interval & is_categorical
galipremsagar Feb 21, 2023
8fffc77
Deprecate is_object
galipremsagar Feb 21, 2023
79e4590
Deprecate is_floating
galipremsagar Feb 21, 2023
defb6f1
Deprecate is_floating
galipremsagar Feb 22, 2023
5e6d8ee
Merge branch 'branch-23.04' into Index_is_
galipremsagar Feb 22, 2023
d53c605
Merge remote-tracking branch 'upstream/branch-23.04' into Index_is_
galipremsagar Feb 22, 2023
e408310
Apply suggestions from code review
galipremsagar Feb 22, 2023
89c353a
Merge branch 'Index_is_' of https://github.com/galipremsagar/cudf int…
galipremsagar Feb 22, 2023
eac49ac
address reviews
galipremsagar Feb 22, 2023
5b39578
Merge branch 'branch-23.04' into Index_is_
galipremsagar Feb 23, 2023
ef32737
Merge branch 'branch-23.04' into Index_is_
galipremsagar Feb 23, 2023
a91fcb5
fix apis
galipremsagar Feb 23, 2023
76c8b1e
Merge branch 'branch-23.04' into Index_is_
galipremsagar Mar 6, 2023
862e9a6
Merge branch 'branch-23.04' into Index_is_
galipremsagar Mar 6, 2023
d428218
Merge remote-tracking branch 'upstream/branch-23.04' into Index_is_
galipremsagar Mar 6, 2023
a16a841
drop duplication
galipremsagar Mar 6, 2023
00b91d8
Merge branch 'branch-23.04' into Index_is_
galipremsagar Mar 6, 2023
4b7b254
Merge remote-tracking branch 'upstream/branch-23.04' into Index_is_
galipremsagar Mar 7, 2023
54c57e4
Update implementations
galipremsagar Mar 7, 2023
ef721b5
Update implementations
galipremsagar Mar 7, 2023
97335e1
Merge branch 'branch-23.04' into Index_is_
galipremsagar Mar 8, 2023
632a6ba
Merge branch 'branch-23.04' into Index_is_
galipremsagar Mar 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 219 additions & 5 deletions python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.

"""Define common type operations."""

Expand Down Expand Up @@ -60,6 +60,8 @@ def is_numeric_dtype(obj):
getattr(obj, "dtype", None), _BaseDtype
):
return False
if isinstance(obj, cudf.BaseIndex):
return obj._is_numeric()
return pd_types.is_numeric_dtype(obj)


Expand Down Expand Up @@ -230,13 +232,228 @@ def _union_categoricals(
return cudf.Index(result_col)


def is_bool_dtype(arr_or_dtype):
"""
Check whether the provided array or dtype is of a boolean dtype.

Parameters
----------
arr_or_dtype : array-like or dtype
The array or dtype to check.

Returns
-------
boolean
Whether or not the array or dtype is of a boolean dtype.

Examples
--------
>>> from cudf.api.types import is_bool_dtype
>>> import numpy as np
>>> import cudf
>>> is_bool_dtype(str)
False
>>> is_bool_dtype(int)
False
>>> is_bool_dtype(bool)
True
>>> is_bool_dtype(np.bool_)
True
>>> is_bool_dtype(np.array(['a', 'b']))
False
>>> is_bool_dtype(cudf.Series([1, 2]))
False
>>> is_bool_dtype(np.array([True, False]))
True
>>> is_bool_dtype(cudf.Series([True, False], dtype='category'))
True
"""
if isinstance(arr_or_dtype, cudf.BaseIndex):
return arr_or_dtype._is_boolean()
elif isinstance(arr_or_dtype, cudf.Series):
if isinstance(arr_or_dtype.dtype, cudf.CategoricalDtype):
return is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be checking the boolean dtype of the categories?

Suggested change
return is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype)
return is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype.categories)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made a branch for CategoricalDtype at line 278 that will do this step while also generalizing for our CategoricalDtype.

else:
return pd_types.is_bool_dtype(arr_or_dtype=arr_or_dtype.dtype)
elif isinstance(arr_or_dtype, cudf.CategoricalDtype):
return pd_types.is_bool_dtype(
arr_or_dtype=arr_or_dtype.categories.dtype
)
else:
return pd_types.is_bool_dtype(arr_or_dtype=arr_or_dtype)


def is_object_dtype(arr_or_dtype):
"""
Check whether an array-like or dtype is of the object dtype.

Parameters
----------
arr_or_dtype : array-like or dtype
The array-like or dtype to check.

Returns
-------
boolean
Whether or not the array-like or dtype is of the object dtype.

Examples
--------
>>> from cudf.api.types import is_object_dtype
>>> import numpy as np
>>> is_object_dtype(object)
True
>>> is_object_dtype(int)
False
>>> is_object_dtype(np.array([], dtype=object))
True
>>> is_object_dtype(np.array([], dtype=int))
False
>>> is_object_dtype([1, 2, 3])
False
"""
if isinstance(arr_or_dtype, cudf.BaseIndex):
return arr_or_dtype._is_object()
elif isinstance(arr_or_dtype, cudf.Series):
return pd_types.is_object_dtype(arr_or_dtype=arr_or_dtype.dtype)
else:
return pd_types.is_object_dtype(arr_or_dtype=arr_or_dtype)


def is_float_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a float dtype.

Parameters
----------
arr_or_dtype : array-like or dtype
The array or dtype to check.

Returns
-------
boolean
Whether or not the array or dtype is of a float dtype.

Examples
--------
>>> from cudf.api.types import is_float_dtype
>>> import numpy as np
>>> import cudf
>>> is_float_dtype(str)
False
>>> is_float_dtype(int)
False
>>> is_float_dtype(float)
True
>>> is_float_dtype(np.array(['a', 'b']))
False
>>> is_float_dtype(cudf.Series([1, 2]))
False
>>> is_float_dtype(cudf.Index([1, 2.]))
True
"""
if isinstance(arr_or_dtype, cudf.BaseIndex):
return arr_or_dtype._is_floating()
return _wrap_pandas_is_dtype_api(pd_types.is_float_dtype)(arr_or_dtype)


def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.

Parameters
----------
arr_or_dtype : array-like or dtype
The array or dtype to check.

Returns
-------
boolean
Whether or not the array or dtype is of an integer dtype and
not an instance of timedelta64.

Examples
--------
>>> from cudf.api.types import is_integer_dtype
>>> import numpy as np
>>> import cudf
>>> is_integer_dtype(str)
False
>>> is_integer_dtype(int)
True
>>> is_integer_dtype(float)
False
>>> is_integer_dtype(np.uint64)
True
>>> is_integer_dtype('int8')
True
>>> is_integer_dtype('Int8')
True
>>> is_integer_dtype(np.datetime64)
False
>>> is_integer_dtype(np.timedelta64)
False
>>> is_integer_dtype(np.array(['a', 'b']))
False
>>> is_integer_dtype(cudf.Series([1, 2]))
True
>>> is_integer_dtype(np.array([], dtype=np.timedelta64))
False
>>> is_integer_dtype(cudf.Index([1, 2.])) # float
False
"""
if isinstance(arr_or_dtype, cudf.BaseIndex):
return arr_or_dtype._is_integer()
return _wrap_pandas_is_dtype_api(pd_types.is_integer_dtype)(arr_or_dtype)


def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a real number dtype.

Parameters
----------
arr_or_dtype : array-like or dtype
The array or dtype to check.

Returns
-------
boolean
Whether or not the array or dtype is of a real number dtype.

Examples
--------
>>> from cudf.api.types import is_any_real_numeric_dtype
>>> import cudf
>>> is_any_real_numeric_dtype(int)
True
>>> is_any_real_numeric_dtype(float)
True
>>> is_any_real_numeric_dtype(object)
False
>>> is_any_real_numeric_dtype(str)
False
>>> is_any_real_numeric_dtype(complex(1, 2))
False
>>> is_any_real_numeric_dtype(bool)
False
>>> is_any_real_numeric_dtype(cudf.Index([1, 2, 3]))
True
"""
return (
is_numeric_dtype(arr_or_dtype)
and not is_complex_dtype(arr_or_dtype)
and not is_bool_dtype(arr_or_dtype)
)


# TODO: The below alias is removed for now since improving cudf categorical
# support is ongoing and we don't want to introduce any ambiguities. The above
# method _union_categoricals will take its place once exposed.
# union_categoricals = pd_types.union_categoricals
infer_dtype = pd_types.infer_dtype
pandas_dtype = pd_types.pandas_dtype
is_bool_dtype = pd_types.is_bool_dtype
is_complex_dtype = pd_types.is_complex_dtype
# TODO: Evaluate which of the datetime types need special handling for cudf.
is_datetime_dtype = _wrap_pandas_is_dtype_api(pd_types.is_datetime64_dtype)
Expand All @@ -246,10 +463,7 @@ def _union_categoricals(
is_datetime64tz_dtype = pd_types.is_datetime64tz_dtype
is_extension_type = pd_types.is_extension_type
is_extension_array_dtype = pd_types.is_extension_array_dtype
is_float_dtype = _wrap_pandas_is_dtype_api(pd_types.is_float_dtype)
is_int64_dtype = pd_types.is_int64_dtype
is_integer_dtype = _wrap_pandas_is_dtype_api(pd_types.is_integer_dtype)
is_object_dtype = pd_types.is_object_dtype
is_period_dtype = pd_types.is_period_dtype
is_signed_integer_dtype = pd_types.is_signed_integer_dtype
is_timedelta_dtype = _wrap_pandas_is_dtype_api(pd_types.is_timedelta64_dtype)
Expand Down
39 changes: 37 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,8 @@ def intersection(self, other, sort=False):

res_name = _get_result_name(self.name, other.name)

if (self.is_boolean() and other.is_numeric()) or (
self.is_numeric() and other.is_boolean()
if (self._is_boolean() and other._is_numeric()) or (
self._is_numeric() and other._is_boolean()
):
if isinstance(self, cudf.MultiIndex):
return self[:0].rename(res_name)
Expand Down Expand Up @@ -841,6 +841,9 @@ def is_numeric(self):
"""
Check if the Index only consists of numeric data.

.. deprecated:: 23.04
Use `cudf.api.types.is_any_real_numeric_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -876,10 +879,15 @@ def is_numeric(self):
"""
raise NotImplementedError

_is_numeric = is_numeric

def is_boolean(self):
"""
Check if the Index only consists of booleans.

.. deprecated:: 23.04
Use `cudf.api.types.is_bool_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -909,10 +917,15 @@ def is_boolean(self):
"""
raise NotImplementedError

_is_boolean = is_boolean

def is_integer(self):
"""
Check if the Index only consists of integers.

.. deprecated:: 23.04
Use `cudf.api.types.is_integer_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -942,13 +955,18 @@ def is_integer(self):
"""
raise NotImplementedError

_is_integer = is_integer

def is_floating(self):
"""
Check if the Index is a floating type.

The Index may consist of only floats, NaNs, or a mix of floats,
integers, or NaNs.

.. deprecated:: 23.04
Use `cudf.api.types.is_float_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -982,10 +1000,15 @@ def is_floating(self):
"""
raise NotImplementedError

_is_floating = is_floating

def is_object(self):
"""
Check if the Index is of the object dtype.

.. deprecated:: 23.04
Use `cudf.api.types.is_object_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -1016,10 +1039,15 @@ def is_object(self):
"""
raise NotImplementedError

_is_object = is_object

def is_categorical(self):
"""
Check if the Index holds categorical data.

.. deprecated:: 23.04
Use `cudf.api.types.is_categorical_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -1057,10 +1085,15 @@ def is_categorical(self):
"""
raise NotImplementedError

_is_categorical = is_categorical

def is_interval(self):
"""
Check if the Index holds Interval objects.

.. deprecated:: 23.04
Use `cudf.api.types.is_interval_dtype` instead.

Returns
-------
bool
Expand Down Expand Up @@ -1092,6 +1125,8 @@ def is_interval(self):
"""
raise NotImplementedError

_is_interval = is_interval

def _union(self, other, sort=None):
# TODO: As a future optimization we should explore
# not doing `to_frame`
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
PANDAS_GE_150 = PANDAS_VERSION >= version.parse("1.5.0")
PANDAS_LT_153 = PANDAS_VERSION < version.parse("1.5.3")
PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0")
Loading