Skip to content

Commit

Permalink
Use more public pandas APIs (#14929)
Browse files Browse the repository at this point in the history
As noted what's public in https://pandas.pydata.org/docs/reference/index.html

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #14929
  • Loading branch information
mroeschke authored Feb 1, 2024
1 parent bb59715 commit 767dde1
Show file tree
Hide file tree
Showing 13 changed files with 55 additions and 45 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
from functools import singledispatch

from pandas.core.groupby.groupby import DataError
from pandas.errors import DataError

from cudf.api.types import is_string_dtype
from cudf.core.buffer import acquire_spill_lock
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2189,7 +2189,7 @@ def as_column(
elif (
arbitrary.size != 0
and arb_dtype.kind in ("O")
and isinstance(arbitrary[0], pd._libs.interval.Interval)
and isinstance(arbitrary[0], pd.Interval)
):
# changing from pd array to series,possible arrow bug
interval_series = pd.Series(arbitrary)
Expand Down
18 changes: 8 additions & 10 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
import pandas as pd
import pyarrow as pa
from nvtx import annotate
from pandas._config import get_option
from pandas.core.dtypes.common import is_float, is_integer
from pandas.io.formats import console
from pandas.io.formats.printing import pprint_thing
from typing_extensions import Self, assert_never
Expand Down Expand Up @@ -1817,12 +1815,12 @@ def _clean_renderable_dataframe(self, output):
dimensions (rows x columns)
"""

max_rows = get_option("display.max_rows")
min_rows = get_option("display.min_rows")
max_cols = get_option("display.max_columns")
max_colwidth = get_option("display.max_colwidth")
show_dimensions = get_option("display.show_dimensions")
if get_option("display.expand_frame_repr"):
max_rows = pd.get_option("display.max_rows")
min_rows = pd.get_option("display.min_rows")
max_cols = pd.get_option("display.max_columns")
max_colwidth = pd.get_option("display.max_colwidth")
show_dimensions = pd.get_option("display.show_dimensions")
if pd.get_option("display.expand_frame_repr"):
width, _ = console.get_console_size()
else:
width = None
Expand Down Expand Up @@ -3318,8 +3316,8 @@ def diff(self, periods=1, axis=0):
Diff currently only supports numeric dtype columns.
"""
if not is_integer(periods):
if not (is_float(periods) and periods.is_integer()):
if not isinstance(periods, int):
if not (isinstance(periods, float) and periods.is_integer()):
raise ValueError("periods must be an integer")
periods = int(periods)

Expand Down
12 changes: 4 additions & 8 deletions python/cudf/cudf/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
from pandas.api import types as pd_types
from pandas.api.extensions import ExtensionDtype
from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
from pandas.core.dtypes.dtypes import (
CategoricalDtype as pd_CategoricalDtype,
CategoricalDtypeType as pd_CategoricalDtypeType,
)

import cudf
from cudf._typing import Dtype
Expand Down Expand Up @@ -971,7 +967,7 @@ def _is_categorical_dtype(obj):
if isinstance(
obj,
(
pd_CategoricalDtype,
pd.CategoricalDtype,
cudf.CategoricalDtype,
cudf.core.index.CategoricalIndex,
cudf.core.column.CategoricalColumn,
Expand All @@ -988,8 +984,8 @@ def _is_categorical_dtype(obj):
obj is t
for t in (
cudf.CategoricalDtype,
pd_CategoricalDtype,
pd_CategoricalDtypeType,
pd.CategoricalDtype,
pd.CategoricalDtype.type,
)
):
return True
Expand All @@ -1010,7 +1006,7 @@ def _is_categorical_dtype(obj):
):
return _is_categorical_dtype(obj.dtype)
if hasattr(obj, "type"):
if obj.type is pd_CategoricalDtypeType:
if obj.type is pd.CategoricalDtype.type:
return True
# TODO: A lot of the above checks are probably redundant and should be
# farmed out to this function here instead.
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import cupy
import numpy as np
import pandas as pd
from pandas._config import get_option
from typing_extensions import Self

import cudf
Expand Down Expand Up @@ -1306,7 +1305,7 @@ def get_loc(self, key):

@_cudf_nvtx_annotate
def __repr__(self):
max_seq_items = get_option("max_seq_items") or len(self)
max_seq_items = pd.get_option("max_seq_items") or len(self)
mr = 0
if 2 * max_seq_items < len(self):
mr = max_seq_items + 1
Expand Down
3 changes: 1 addition & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import cupy as cp
import numpy as np
import pandas as pd
from pandas._config import get_option

import cudf
import cudf._lib as libcudf
Expand Down Expand Up @@ -428,7 +427,7 @@ def copy(

@_cudf_nvtx_annotate
def __repr__(self):
max_seq_items = get_option("display.max_seq_items") or len(self)
max_seq_items = pd.get_option("display.max_seq_items") or len(self)

if len(self) > max_seq_items:
n = int(max_seq_items / 2) + 1
Expand Down
14 changes: 6 additions & 8 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import cupy
import numpy as np
import pandas as pd
from pandas._config import get_option
from pandas.core.dtypes.common import is_float
from typing_extensions import Self, assert_never

import cudf
Expand Down Expand Up @@ -1405,8 +1403,8 @@ def __repr__(self):
_, height = get_terminal_size()
max_rows = (
height
if get_option("display.max_rows") == 0
else get_option("display.max_rows")
if pd.get_option("display.max_rows") == 0
else pd.get_option("display.max_rows")
)
if max_rows not in (0, None) and len(self) > max_rows:
top = self.head(int(max_rows / 2 + 1))
Expand Down Expand Up @@ -1451,10 +1449,10 @@ def __repr__(self):
):
min_rows = (
height
if get_option("display.min_rows") == 0
else get_option("display.min_rows")
if pd.get_option("display.min_rows") == 0
else pd.get_option("display.min_rows")
)
show_dimensions = get_option("display.show_dimensions")
show_dimensions = pd.get_option("display.show_dimensions")
if preprocess._column.categories.dtype.kind == "f":
pd_series = (
preprocess.astype("str")
Expand Down Expand Up @@ -3392,7 +3390,7 @@ def diff(self, periods=1):
dtype: int64
"""
if not is_integer(periods):
if not (is_float(periods) and periods.is_integer()):
if not (isinstance(periods, float) and periods.is_integer()):
raise ValueError("periods must be an integer")
periods = int(periods)

Expand Down
26 changes: 25 additions & 1 deletion python/cudf/cudf/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import numpy as np
import pandas as pd
import pandas.tseries.offsets as pd_offset
from pandas.core.tools.datetimes import _unit_map
from typing_extensions import Self

import cudf
Expand All @@ -21,6 +20,31 @@
from cudf.core import column
from cudf.core.index import as_index

# https://github.com/pandas-dev/pandas/blob/2.2.x/pandas/core/tools/datetimes.py#L1112
_unit_map = {
"year": "year",
"years": "year",
"month": "month",
"months": "month",
"day": "day",
"days": "day",
"hour": "h",
"hours": "h",
"minute": "m",
"minutes": "m",
"second": "s",
"seconds": "s",
"ms": "ms",
"millisecond": "ms",
"milliseconds": "ms",
"us": "us",
"microsecond": "us",
"microseconds": "us",
"ns": "ns",
"nanosecond": "ns",
"nanoseconds": "ns",
}

_unit_dtype_map = {
"ns": "datetime64[ns]",
"us": "datetime64[us]",
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/io/hdf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.

import warnings

Expand Down Expand Up @@ -27,4 +27,4 @@ def to_hdf(path_or_buf, key, value, *args, **kwargs):
"be GPU accelerated in the future"
)
pd_value = value.to_pandas()
pd.io.pytables.to_hdf(path_or_buf, key, pd_value, *args, **kwargs)
pd_value.to_hdf(path_or_buf, key, *args, **kwargs)
3 changes: 1 addition & 2 deletions python/cudf/cudf/io/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,8 @@ def to_json(
pd_value = pd.DataFrame(pd_data)
else:
pd_value = maybe_return_nullable_pd_obj(cudf_val)
return pd.io.json.to_json(
return pd_value.to_json(
path_or_buf,
pd_value,
orient=orient,
storage_options=storage_options,
*args,
Expand Down
5 changes: 1 addition & 4 deletions python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -50,9 +50,6 @@ xfail_strict = true
filterwarnings = [
"error:Sparse:FutureWarning",
"error:The SparseArray:FutureWarning",
# Deprecation gives warning on import during pytest collection
"ignore:pandas.core.index is deprecated:FutureWarning:importlib",
"ignore:pandas.util.testing is deprecated:FutureWarning:importlib",
# Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758
"ignore:`np.MachAr` is deprecated:DeprecationWarning:numba",
]
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1705,14 +1705,14 @@ def test_date_range_raise_overflow():
start = np.datetime64(np.iinfo("int64").max, "ns")
periods = 2
freq = cudf.DateOffset(nanoseconds=1)
with pytest.raises(pd._libs.tslibs.np_datetime.OutOfBoundsDatetime):
with pytest.raises(pd.errors.OutOfBoundsDatetime):
cudf.date_range(start=start, periods=periods, freq=freq)

# Non-fixed offset
start = np.datetime64(np.iinfo("int64").max, "ns")
periods = 2
freq = cudf.DateOffset(months=1)
with pytest.raises(pd._libs.tslibs.np_datetime.OutOfBoundsDatetime):
with pytest.raises(pd.errors.OutOfBoundsDatetime):
# Extending beyond the max value will trigger a warning when pandas
# does an internal conversion to a Python built-in datetime.datetime
# object, which only supports down to microsecond resolution.
Expand Down
4 changes: 2 additions & 2 deletions python/dask_cudf/dask_cudf/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from dask.utils import Dispatch, is_arraylike

import cudf
from cudf.api.types import _is_datetime64tz_dtype, is_string_dtype
from cudf.api.types import is_string_dtype
from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate

from .core import DataFrame, Index, Series
Expand Down Expand Up @@ -126,7 +126,7 @@ def _get_non_empty_data(s):
data = cudf.core.column.as_column(data, dtype=s.dtype)
elif is_string_dtype(s.dtype):
data = pa.array(["cat", "dog"])
elif _is_datetime64tz_dtype(s.dtype):
elif isinstance(s.dtype, pd.DatetimeTZDtype):
from cudf.utils.dtypes import get_time_unit

data = cudf.date_range("2001-01-01", periods=2, freq=get_time_unit(s))
Expand Down

0 comments on commit 767dde1

Please sign in to comment.