Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: Remove instances of pd.core #14421

Merged
merged 7 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
PANDAS_LT_153 = PANDAS_VERSION < version.parse("1.5.3")
PANDAS_GE_200 = PANDAS_VERSION >= version.parse("2.0.0")
PANDAS_GE_210 = PANDAS_VERSION >= version.parse("2.1.0")
PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,17 @@
is_scalar,
is_timedelta64_dtype,
)
from cudf.core._compat import PANDAS_GE_220
from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
from cudf.core.column import ColumnBase, as_column, column, string
from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
from cudf.utils.dtypes import _get_base_dtype
from cudf.utils.utils import _all_bools_with_nulls

_guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format
if PANDAS_GE_220:
_guess_datetime_format = pd.tseries.api.guess_datetime_format
else:
_guess_datetime_format = pd.core.tools.datetimes.guess_datetime_format

# nanoseconds per time_unit
_dtype_to_format_conversion = {
Expand Down
13 changes: 7 additions & 6 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2310,13 +2310,14 @@ def to_dict(
""" # noqa: E501
orient = orient.lower()

result = self.to_pandas().to_dict(orient=orient, into=into)
if orient == "series":
# Special case needed to avoid converting
# cudf.Series objects into pd.Series
into_c = pd.core.common.standardize_mapping(into)
return into_c((k, v) for k, v in self.items())

return self.to_pandas().to_dict(orient=orient, into=into)
# Ensure values are cudf.Series
converted = ((k, Series(v)) for k, v in result.items())
if isinstance(into, defaultdict):
return type(result)(into.default_factory, converted)
return type(result)(converted)
return result
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: ‏This change means that the "special-case" orient="series" which previously did not induce a copy now produces both a DtoH and HtoD copy.

We should just handle the case correctly:

if orient == "series":
    if not inspect.isclass(into):
        cons = type(into)
        if isinstance(into, defaultdict):
            cons = partial(cons, into.default_factory)
    elif issubclass(into, Mapping):
        cons = into
        if issubclass(into, defaultdict):
           raise TypeError("Must provide initialised defaultdict")
    else:
       raise TypeError(...)
    return cons(self.items())

Aside, this is a mad interface, one should just be on the hook for providing something that has the same __init__ signature as dict (i.e. *args, **kwargs -> Mapping).

(I note the implementation in pandas has a bug because it doesn't preserve the type of the output if the input is a subclass of defaultdict)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now produces both a DtoH and HtoD copy.

Oof thanks for the catch! Any tips on generally knowing when a DtoH or HtoD occurs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any time you call to_pandas or to_arrow or .values_host or similar to get a host-side version of the object. Then in the other direction, any time you have a host-side object.


@_cudf_nvtx_annotate
def scatter_by_map(
Expand Down
11 changes: 7 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,16 @@ def __contains__(self, item):
item, tuple(np.sctypes["int"] + np.sctypes["float"] + [int, float])
):
return False
elif isinstance(item, bool):
return False
try:
item = pd.core.dtypes.common.ensure_python_int(item)
except TypeError:
int_item = int(item)
assert int_item == item
except (TypeError, AssertionError):
return False
wence- marked this conversation as resolved.
Show resolved Hide resolved
if not item % 1 == 0:
if not int_item % 1 == 0:
return False
wence- marked this conversation as resolved.
Show resolved Hide resolved
return item in range(self._start, self._stop, self._step)
return int_item in range(self._start, self._stop, self._step)
wence- marked this conversation as resolved.
Show resolved Hide resolved

@_cudf_nvtx_annotate
def copy(self, name=None, deep=False, dtype=None, names=None):
Expand Down
9 changes: 4 additions & 5 deletions python/cudf/cudf/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
from cudf.core._compat import PANDAS_GE_150
from cudf.testing._utils import (
_create_pandas_series_float64_default,
assert_eq,
Expand Down Expand Up @@ -536,10 +536,9 @@ def get_window_bounds(
"indexer",
[
pd.api.indexers.FixedForwardWindowIndexer(window_size=2),
pd.core.window.expanding.ExpandingIndexer(),
pd.core.window.indexers.FixedWindowIndexer(window_size=3)
if PANDAS_LT_140
else pd.core.indexers.objects.FixedWindowIndexer(window_size=3),
pd.api.indexers.VariableOffsetWindowIndexer(
index=pd.date_range("2020", periods=5), offset=pd.offsets.BDay(1)
),
wence- marked this conversation as resolved.
Show resolved Hide resolved
],
)
def test_rolling_indexer_support(indexer):
Expand Down