From 19942809679e4675c296a38f90bfdbaa8574eee2 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Sat, 20 Jan 2024 01:39:00 -0500 Subject: [PATCH] Enable intermediate proxies to be picklable (#14752) Closes https://github.com/rapidsai/cudf/issues/14738 Enables intermediate proxy types to be pickled, same as final proxy types. Authors: - Ashwin Srinath (https://github.com/shwina) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/14752 --- python/cudf/cudf/pandas/fast_slow_proxy.py | 77 ++++++++++++++----- python/cudf/cudf/pandas/module_accelerator.py | 7 +- .../cudf_pandas_tests/test_cudf_pandas.py | 8 ++ 3 files changed, 70 insertions(+), 22 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 3dc6a59cc16..d132116af61 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -25,6 +25,11 @@ from .annotation import nvtx + +def call_operator(fn, args, kwargs): + return fn(*args, **kwargs) + + _CUDF_PANDAS_NVTX_COLORS = { "COPY_SLOW_TO_FAST": 0xCA0020, "COPY_FAST_TO_SLOW": 0xF4A582, @@ -189,22 +194,6 @@ def _fsproxy_state(self) -> _State: else _State.SLOW ) - def __reduce__(self): - # Need a local import to avoid circular import issues - from .module_accelerator import disable_module_accelerator - - with disable_module_accelerator(): - pickled_wrapped_obj = pickle.dumps(self._fsproxy_wrapped) - return (_PickleConstructor(type(self)), (), pickled_wrapped_obj) - - def __setstate__(self, state): - # Need a local import to avoid circular import issues - from .module_accelerator import disable_module_accelerator - - with disable_module_accelerator(): - unpickled_wrapped_obj = pickle.loads(state) - self._fsproxy_wrapped = unpickled_wrapped_obj - slow_dir = dir(slow_type) cls_dict = { "__init__": __init__, @@ -215,9 +204,8 @@ def __setstate__(self, state): "_fsproxy_slow_to_fast": _fsproxy_slow_to_fast, "_fsproxy_fast_to_slow": _fsproxy_fast_to_slow, "_fsproxy_state": _fsproxy_state, - "__reduce__": __reduce__, - "__setstate__": __setstate__, } + if additional_attributes is None: additional_attributes = {} for method in _SPECIAL_METHODS: @@ -716,6 +704,27 @@ def _fsproxy_wrap(cls, value, func): proxy._fsproxy_wrapped = value return proxy + def __reduce__(self): + """ + In conjunction with `__proxy_setstate__`, this effectively enables + proxy types to be pickled and unpickled by pickling and unpickling + the underlying wrapped types. + """ + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + pickled_wrapped_obj = pickle.dumps(self._fsproxy_wrapped) + return (_PickleConstructor(type(self)), (), pickled_wrapped_obj) + + def __setstate__(self, state): + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + unpickled_wrapped_obj = pickle.loads(state) + self._fsproxy_wrapped = unpickled_wrapped_obj + class _IntermediateProxy(_FastSlowProxy): """ @@ -772,6 +781,34 @@ def _fsproxy_fast_to_slow(self) -> Any: args, kwargs = _slow_arg(args), _slow_arg(kwargs) return func(*args, **kwargs) + def __reduce__(self): + """ + In conjunction with `__proxy_setstate__`, this effectively enables + proxy types to be pickled and unpickled by pickling and unpickling + the underlying wrapped types. + """ + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + pickled_wrapped_obj = pickle.dumps(self._fsproxy_wrapped) + pickled_method_chain = pickle.dumps(self._method_chain) + return ( + _PickleConstructor(type(self)), + (), + (pickled_wrapped_obj, pickled_method_chain), + ) + + def __setstate__(self, state): + # Need a local import to avoid circular import issues + from .module_accelerator import disable_module_accelerator + + with disable_module_accelerator(): + unpickled_wrapped_obj = pickle.loads(state[0]) + unpickled_method_chain = pickle.loads(state[1]) + self._fsproxy_wrapped = unpickled_wrapped_obj + self._method_chain = unpickled_method_chain + class _CallableProxyMixin: """ @@ -788,7 +825,7 @@ def __call__(self, *args, **kwargs) -> Any: # _fast_slow_function_call) to avoid infinite recursion. # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call - lambda fn, args, kwargs: fn(*args, **kwargs), + call_operator, self, args, kwargs, diff --git a/python/cudf/cudf/pandas/module_accelerator.py b/python/cudf/cudf/pandas/module_accelerator.py index 180d75d96e8..e97d6e4af24 100644 --- a/python/cudf/cudf/pandas/module_accelerator.py +++ b/python/cudf/cudf/pandas/module_accelerator.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -551,12 +551,15 @@ def getattr_real_or_wrapped( # release the lock after reading this value) use_real = not loader._use_fast_lib if not use_real: + CUDF_PANDAS_PATH = __file__.rsplit("/", 1)[0] # Only need to check the denylist if we're not turned off. frame = sys._getframe() # We cannot possibly be at the top level. assert frame.f_back calling_module = pathlib.PurePath(frame.f_back.f_code.co_filename) - use_real = any( + use_real = not calling_module.is_relative_to( + CUDF_PANDAS_PATH + ) and any( calling_module.is_relative_to(path) for path in loader._denylist ) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 44f301819ed..738ff24f374 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1235,6 +1235,14 @@ def test_func_namespace(): assert xpd.concat is xpd.core.reshape.concat.concat +def test_pickle_groupby(dataframe): + pdf, df = dataframe + pgb = pdf.groupby("a") + gb = df.groupby("a") + gb = pickle.loads(pickle.dumps(gb)) + tm.assert_equal(pgb.sum(), gb.sum()) + + def test_isinstance_base_offset(): offset = xpd.tseries.frequencies.to_offset("1s") assert isinstance(offset, xpd.tseries.offsets.BaseOffset)