diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 169dd80e132..5f4cf2e6cc6 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -9,6 +9,7 @@ import operator import pickle import types +import warnings from collections.abc import Iterator from enum import IntEnum from typing import ( @@ -23,6 +24,10 @@ Type, ) +import numpy as np + +from ..options import _env_get_bool +from ..testing._utils import assert_eq from .annotation import nvtx @@ -808,7 +813,9 @@ def __get__(self, instance, owner) -> Any: else: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( - getattr, owner, self._name + getattr, + owner, + self._name, ) if isinstance( @@ -829,9 +836,11 @@ def __get__(self, instance, owner) -> Any: getattr(instance._fsproxy_slow, self._name), None, # type: ignore ) - return _fast_slow_function_call(getattr, instance, self._name)[ - 0 - ] + return _fast_slow_function_call( + getattr, + instance, + self._name, + )[0] return self._attr @@ -866,7 +875,17 @@ def __name__(self, value): setattr(self._fsproxy_slow, "__name__", value) -def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: +def _assert_fast_slow_eq(left, right): + if _is_final_type(type(left)) or type(left) in NUMPY_TYPES: + assert_eq(left, right) + + +def _fast_slow_function_call( + func: Callable, + /, + *args, + **kwargs, +) -> Any: """ Call `func` with all `args` and `kwargs` converted to their respective fast type. If that fails, call `func` with all @@ -890,6 +909,37 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: # try slow path raise Exception() fast = True + if _env_get_bool("CUDF_PANDAS_DEBUGGING", False): + try: + with nvtx.annotate( + "EXECUTE_SLOW_DEBUG", + color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], + domain="cudf_pandas", + ): + slow_args, slow_kwargs = ( + _slow_arg(args), + _slow_arg(kwargs), + ) + with disable_module_accelerator(): + slow_result = func(*slow_args, **slow_kwargs) + except Exception as e: + warnings.warn( + "The result from pandas could not be computed. " + f"The exception was {e}." + ) + else: + try: + _assert_fast_slow_eq(result, slow_result) + except AssertionError as e: + warnings.warn( + "The results from cudf and pandas were different. " + f"The exception was {e}." + ) + except Exception as e: + warnings.warn( + "Pandas debugging mode failed. " + f"The exception was {e}." + ) except Exception: with nvtx.annotate( "EXECUTE_SLOW", @@ -1135,6 +1185,9 @@ def _replace_closurevars( ) +NUMPY_TYPES: Set[str] = set(np.sctypeDict.values()) + + _SPECIAL_METHODS: Set[str] = { "__abs__", "__add__", diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index fef829b17fc..72e9ad5fca3 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -41,8 +41,9 @@ get_calendar, ) -# Accelerated pandas has the real pandas module as an attribute +# Accelerated pandas has the real pandas and cudf modules as attributes pd = xpd._fsproxy_slow +cudf = xpd._fsproxy_fast @pytest.fixture @@ -1424,5 +1425,66 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] +def test_cudf_pandas_debugging_different_results(monkeypatch): + cudf_mean = cudf.Series.mean + + def mock_mean_one(self, *args, **kwargs): + return np.float64(1.0) + + with monkeypatch.context() as monkeycontext: + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean_one) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) + with pytest.warns( + UserWarning, + match="The results from cudf and pandas were different.", + ): + assert s.mean() == 1.0 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean) + + +def test_cudf_pandas_debugging_pandas_error(monkeypatch): + pd_mean = pd.Series.mean + + def mock_mean_exception(self, *args, **kwargs): + raise Exception() + + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr( + xpd.Series.mean, "_fsproxy_slow", mock_mean_exception + ) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) + with pytest.warns( + UserWarning, + match="The result from pandas could not be computed.", + ): + s = xpd.Series([1, 2]) + assert s.mean() == 1.5 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) + + +def test_cudf_pandas_debugging_failed(monkeypatch): + pd_mean = pd.Series.mean + + def mock_mean_none(self, *args, **kwargs): + return None + + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(xpd.Series.mean, "_fsproxy_slow", mock_mean_none) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) + with pytest.warns( + UserWarning, + match="Pandas debugging mode failed.", + ): + s = xpd.Series([1, 2]) + assert s.mean() == 1.5 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) + + def test_excelwriter_pathlike(): assert isinstance(pd.ExcelWriter("foo.xlsx"), os.PathLike)