From 577cb93338ce4b0709e9ed3d8fe44dea1abdfe17 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 24 May 2024 10:25:05 -0700 Subject: [PATCH 01/24] Add a test --- python/cudf/cudf/options.py | 15 ++ python/cudf/cudf/pandas/fast_slow_proxy.py | 150 ++++++++++++++++++ .../cudf_pandas_tests/test_fast_slow_proxy.py | 3 + 3 files changed, 168 insertions(+) diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index efa8eabd8b8..b2453156f39 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -308,6 +308,21 @@ def _integer_and_none_validator(val): _make_contains_validator([False, True]), ) +_register_option( + "mode.pandas_debugging", + False, + textwrap.dedent( + """ + If set to `True`, enables cudf.pandas debugging mode. + When enabled, cudf code paths in cudf.pandas will + also run with pandas and raise a warning if the + results from cudf and pandas differ. + If set to `False`, cudf.pandas debugging is disabled. + """ + ), + _make_contains_validator([False, True]), +) + class option_context(ContextDecorator): """ diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 94caec1ce6c..72e9fb30355 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -9,6 +9,7 @@ import operator import pickle import types +import warnings from collections.abc import Iterator from enum import IntEnum from typing import ( @@ -23,6 +24,9 @@ Type, ) +from cudf.testing._utils import assert_eq + +from ..options import get_option from .annotation import nvtx @@ -898,6 +902,41 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: # try slow path raise Exception() fast = True + + if get_option("mode.pandas_debugging"): + try: + with nvtx.annotate( + "EXECUTE_SLOW", + color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], + domain="cudf_pandas", + ): + slow_args, slow_kwargs = ( + _slow_arg(args), + _slow_arg(kwargs), + ) + with disable_module_accelerator(): + slow_result = func(*slow_args, **slow_kwargs) + except Exception as e: + warnings.warn( + "The result from pandas couyld not be computed correctly. " + f"The exception was {e}." + ) + else: + try: + print("FAST ", result, type(result)) + print("SLOW ", slow_result, type(slow_result)) + if type(result).__name__ in _CUDF_OBJ_FINAL_TYPES: + assert_eq(result, slow_result) + except AssertionError as ae: + warnings.warn( + "The results from cudf and pandas were different. " + f"The exception was {ae}." + ) + except Exception as e: + warnings.warn( + "Pandas debugging mode failed. " + f"The exception was {e}." + ) except Exception: with nvtx.annotate( "EXECUTE_SLOW", @@ -1143,6 +1182,117 @@ def _replace_closurevars( ) +_CUDF_OBJ_FINAL_TYPES: Set[str] = { + "Timedelta", + "Timestamp", + "DataFrame", + "Series", + "Index", + "RangeIndex", + "SparseDtype", + "SparseArray", + "CategoricalIndex", + "Categorical", + "CategoricalDtype", + "DatetimeIndex", + "DatetimeArray", + "DatetimeTZDtype", + "TimedeltaIndex", + "NumpyExtensionArray", + "PandasArray", + "TimedeltaArray", + "PeriodIndex", + "PeriodArray", + "PeriodDtype", + "Period", + "MultiIndex", + "Grouper", + "StringArray", + "StringDtype", + "BooleanArray", + "BooleanDtype", + "IntegerArray", + "Int8Dtype", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "UInt8Dtype", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "IntervalIndex", + "IntervalArray", + "IntervalDtype", + "Interval", + "FloatingArray", + "Float32Dtype", + "Float64Dtype", + "FixedForwardWindowIndexer", + "VariableOffsetWindowIndexer", + "HDFStore", + "ExcelFile", + "ExcelWriter", + "Styler", + "USFederalHolidayCalendar", + "HolidayCalendarMetaClass", + "AbstractHolidayCalendar", + "Holiday", + "USThanksgivingDay", + "USColumbusDay", + "USLaborDay", + "USMemorialDay", + "USMartinLutherKingJr", + "USPresidentsDay", + "GoodFriday", + "EasterMonday", + "FY5253", + "BDay", + "BMonthBegin", + "BMonthEnd", + "BQuarterBegin", + "BQuarterEnd", + "BusinessDay", + "BusinessHour", + "BusinessMonthBegin", + "BusinessMonthEnd", + "BYearBegin", + "BYearEnd", + "CBMonthBegin", + "CBMonthEnd", + "CDay", + "CustomBusinessDay", + "CustomBusinessHour", + "CustomBusinessMonthBegin", + "CustomBusinessMonthEnd", + "DateOffset", + "BaseOffset", + "Day", + "Easter", + "FY5253Quarter", + "Hour", + "LastWeekOfMonth", + "Micro", + "Milli", + "Minute", + "MonthBegin", + "MonthEnd", + "Nano", + "QuarterBegin", + "QuarterEnd", + "Second", + "SemiMonthBegin", + "SemiMonthEnd", + "Tick", + "Week", + "WeekOfMonth", + "YearBegin", + "YearEnd", + "Flags", + "NamedAgg", + "ArrowExtensionArray", +} + + _SPECIAL_METHODS: Set[str] = { "__abs__", "__add__", diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index 39bf07c49de..43c5d8e33c8 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -545,3 +545,6 @@ def test_tuple_with_attrs_transform(): assert b == bprime and b is not bprime assert c == cprime and c is not cprime assert d == dprime and d is not dprime + +def test_fast_slow_function_call(): + pass From 925606ddf2462689009cfd2fe8ba115db769e072 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 28 May 2024 05:31:48 -0700 Subject: [PATCH 02/24] Add a test --- python/cudf/cudf/pandas/fast_slow_proxy.py | 10 +++++++--- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 15 +++++++++++++++ .../cudf_pandas_tests/test_fast_slow_proxy.py | 3 --- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 72e9fb30355..4d6756f68e7 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -918,14 +918,15 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: slow_result = func(*slow_args, **slow_kwargs) except Exception as e: warnings.warn( - "The result from pandas couyld not be computed correctly. " + "The result from pandas could not be computed correctly. " f"The exception was {e}." ) else: try: print("FAST ", result, type(result)) print("SLOW ", slow_result, type(slow_result)) - if type(result).__name__ in _CUDF_OBJ_FINAL_TYPES: + print(type(result).__name__) + if type(result).__name__ in _TYPES: assert_eq(result, slow_result) except AssertionError as ae: warnings.warn( @@ -1182,7 +1183,7 @@ def _replace_closurevars( ) -_CUDF_OBJ_FINAL_TYPES: Set[str] = { +_TYPES: Set[str] = { "Timedelta", "Timestamp", "DataFrame", @@ -1290,6 +1291,9 @@ def _replace_closurevars( "Flags", "NamedAgg", "ArrowExtensionArray", + "int", + "str", + "float", } diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 75bceea3034..6e24b46a700 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1421,3 +1421,18 @@ def test_holidays_within_dates(holiday, start, expected): utc.localize(xpd.Timestamp(start)), ) ) == [utc.localize(dt) for dt in expected] + + +def test_fast_slow_function_call(monkeypatch): + from cudf import Series, set_option + + set_option("mode.pandas_debugging", True) + + def mock_mean(self, *args, **kwargs): + return 1.0 + + monkeypatch.setattr(Series, "mean", mock_mean) + with pytest.warns(UserWarning): + s = pd.Series([1, 2]) + s.mean() + set_option("mode.pandas_debugging", False) diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index 43c5d8e33c8..39bf07c49de 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -545,6 +545,3 @@ def test_tuple_with_attrs_transform(): assert b == bprime and b is not bprime assert c == cprime and c is not cprime assert d == dprime and d is not dprime - -def test_fast_slow_function_call(): - pass From ffd8dede1cbfe4bebf383e29c5a00027f7484395 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 28 May 2024 05:57:35 -0700 Subject: [PATCH 03/24] Add debug mode as an argument --- python/cudf/cudf/pandas/fast_slow_proxy.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 4d6756f68e7..7ae9487ff47 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -878,7 +878,13 @@ def __name__(self, value): setattr(self._fsproxy_slow, "__name__", value) -def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: +def _fast_slow_function_call( + func: Callable, + /, + *args, + debug_mode: str = "mode.pandas_debugging", + **kwargs, +) -> Any: """ Call `func` with all `args` and `kwargs` converted to their respective fast type. If that fails, call `func` with all @@ -903,7 +909,7 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: raise Exception() fast = True - if get_option("mode.pandas_debugging"): + if get_option(debug_mode): try: with nvtx.annotate( "EXECUTE_SLOW", @@ -923,9 +929,6 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: ) else: try: - print("FAST ", result, type(result)) - print("SLOW ", slow_result, type(slow_result)) - print(type(result).__name__) if type(result).__name__ in _TYPES: assert_eq(result, slow_result) except AssertionError as ae: From d7286e94ecdce9d86d66157deb6e4e8e377b7360 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 28 May 2024 08:00:02 -0700 Subject: [PATCH 04/24] Add an env var --- python/cudf/cudf/options.py | 2 +- python/cudf/cudf/pandas/fast_slow_proxy.py | 5 ++++- .../cudf_pandas_tests/test_cudf_pandas.py | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index b2453156f39..00f6d9fcf84 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -310,7 +310,7 @@ def _integer_and_none_validator(val): _register_option( "mode.pandas_debugging", - False, + _env_get_bool("CUDF_PANDAS_DEBUG", False), textwrap.dedent( """ If set to `True`, enables cudf.pandas debugging mode. diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 7ae9487ff47..f937d5e1af7 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -24,6 +24,7 @@ Type, ) +from cudf.options import _env_get_bool from cudf.testing._utils import assert_eq from ..options import get_option @@ -909,7 +910,9 @@ def _fast_slow_function_call( raise Exception() fast = True - if get_option(debug_mode): + if get_option(debug_mode) | _env_get_bool( + "CUDF_PANDAS_DEBUG", False + ): try: with nvtx.annotate( "EXECUTE_SLOW", diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 6e24b46a700..a3ff6057840 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1423,7 +1423,7 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -def test_fast_slow_function_call(monkeypatch): +def test_pandas_debugging_mode_option(monkeypatch): from cudf import Series, set_option set_option("mode.pandas_debugging", True) @@ -1434,5 +1434,20 @@ def mock_mean(self, *args, **kwargs): monkeypatch.setattr(Series, "mean", mock_mean) with pytest.warns(UserWarning): s = pd.Series([1, 2]) - s.mean() + assert s.mean() == 1.0 set_option("mode.pandas_debugging", False) + + +def test_pandas_debugging_mode_env_var(monkeypatch): + from cudf import Series + + monkeypatch.setenv("CUDF_PANDAS_DEBUG", "True") + + def mock_mean(self, *args, **kwargs): + return 1.0 + + monkeypatch.setattr(Series, "mean", mock_mean) + with pytest.warns(UserWarning): + s = pd.Series([1, 2]) + assert s.mean() == 1.0 + monkeypatch.setenv("CUDF_PANDAS_DEBUG", "False") From b33f52201e242eb88bd578033a01f7f14231fa63 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 28 May 2024 09:08:03 -0700 Subject: [PATCH 05/24] Change to xpd --- .../cudf/cudf_pandas_tests/test_cudf_pandas.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index a3ff6057840..1f5768b6488 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1433,21 +1433,6 @@ def mock_mean(self, *args, **kwargs): monkeypatch.setattr(Series, "mean", mock_mean) with pytest.warns(UserWarning): - s = pd.Series([1, 2]) + s = xpd.Series([1, 2]) assert s.mean() == 1.0 set_option("mode.pandas_debugging", False) - - -def test_pandas_debugging_mode_env_var(monkeypatch): - from cudf import Series - - monkeypatch.setenv("CUDF_PANDAS_DEBUG", "True") - - def mock_mean(self, *args, **kwargs): - return 1.0 - - monkeypatch.setattr(Series, "mean", mock_mean) - with pytest.warns(UserWarning): - s = pd.Series([1, 2]) - assert s.mean() == 1.0 - monkeypatch.setenv("CUDF_PANDAS_DEBUG", "False") From 9dc9ba9846f2ad827ba77086ddb1527fc36be25d Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 28 May 2024 10:35:29 -0700 Subject: [PATCH 06/24] Add test with env var --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 1f5768b6488..71b159627fc 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1436,3 +1436,18 @@ def mock_mean(self, *args, **kwargs): s = xpd.Series([1, 2]) assert s.mean() == 1.0 set_option("mode.pandas_debugging", False) + + +def test_pandas_debugging_mode_env_var(monkeypatch): + from cudf import Series + + monkeypatch.setenv("CUDF_PANDAS_DEBUG", "True") + + def mock_mean(self, *args, **kwargs): + return 1.0 + + monkeypatch.setattr(Series, "mean", mock_mean) + with pytest.warns(UserWarning): + s = xpd.Series([1, 2]) + assert s.mean() == 1.0 + monkeypatch.setenv("CUDF_PANDAS_DEBUG", "False") From 9699b31f55809c406e42be4f89726f1d75d73706 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 29 May 2024 05:17:20 -0700 Subject: [PATCH 07/24] Add argument for assert func, delete test with env var --- python/cudf/cudf/pandas/fast_slow_proxy.py | 3 ++- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 15 --------------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index f937d5e1af7..df96cd3d786 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -884,6 +884,7 @@ def _fast_slow_function_call( /, *args, debug_mode: str = "mode.pandas_debugging", + assert_func: Callable = assert_eq, **kwargs, ) -> Any: """ @@ -933,7 +934,7 @@ def _fast_slow_function_call( else: try: if type(result).__name__ in _TYPES: - assert_eq(result, slow_result) + assert_func(result, slow_result) except AssertionError as ae: warnings.warn( "The results from cudf and pandas were different. " diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 71b159627fc..1f5768b6488 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1436,18 +1436,3 @@ def mock_mean(self, *args, **kwargs): s = xpd.Series([1, 2]) assert s.mean() == 1.0 set_option("mode.pandas_debugging", False) - - -def test_pandas_debugging_mode_env_var(monkeypatch): - from cudf import Series - - monkeypatch.setenv("CUDF_PANDAS_DEBUG", "True") - - def mock_mean(self, *args, **kwargs): - return 1.0 - - monkeypatch.setattr(Series, "mean", mock_mean) - with pytest.warns(UserWarning): - s = xpd.Series([1, 2]) - assert s.mean() == 1.0 - monkeypatch.setenv("CUDF_PANDAS_DEBUG", "False") From e421361ed1e73f1d81861085f0fa9b149251c558 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 29 May 2024 09:46:52 -0700 Subject: [PATCH 08/24] Address comments --- python/cudf/cudf/options.py | 2 +- python/cudf/cudf/pandas/fast_slow_proxy.py | 56 ++++++++++++------- .../cudf_pandas_tests/test_cudf_pandas.py | 20 ++++--- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index 00f6d9fcf84..7ffa6b9fc56 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -310,7 +310,7 @@ def _integer_and_none_validator(val): _register_option( "mode.pandas_debugging", - _env_get_bool("CUDF_PANDAS_DEBUG", False), + _env_get_bool("MODE_PANDAS_DEBUGGING", False), textwrap.dedent( """ If set to `True`, enables cudf.pandas debugging mode. diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index df96cd3d786..1824a4d3669 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -19,12 +19,12 @@ Literal, Mapping, Optional, + Sequence, Set, Tuple, Type, ) -from cudf.options import _env_get_bool from cudf.testing._utils import assert_eq from ..options import get_option @@ -35,6 +35,12 @@ def call_operator(fn, args, kwargs): return fn(*args, **kwargs) +DEBUG_MODE = ( + "mode.pandas_debugging" if get_option("mode.pandas_debugging") else None +) +ASSERT_FUNC = None + + _CUDF_PANDAS_NVTX_COLORS = { "COPY_SLOW_TO_FAST": 0xCA0020, "COPY_FAST_TO_SLOW": 0xF4A582, @@ -179,6 +185,8 @@ def __init__(self, *args, **kwargs): lambda cls, args, kwargs: setattr( self, "_fsproxy_wrapped", cls(*args, **kwargs) ), + DEBUG_MODE, + ASSERT_FUNC, type(self), args, kwargs, @@ -708,6 +716,8 @@ def __call__(self, *args, **kwargs) -> Any: # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call call_operator, + DEBUG_MODE, + ASSERT_FUNC, self, args, kwargs, @@ -821,7 +831,7 @@ def __get__(self, instance, owner) -> Any: else: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( - getattr, owner, self._name + getattr, DEBUG_MODE, ASSERT_FUNC, owner, self._name ) if isinstance( @@ -842,9 +852,9 @@ def __get__(self, instance, owner) -> Any: getattr(instance._fsproxy_slow, self._name), None, # type: ignore ) - return _fast_slow_function_call(getattr, instance, self._name)[ - 0 - ] + return _fast_slow_function_call( + getattr, DEBUG_MODE, ASSERT_FUNC, instance, self._name + )[0] return self._attr @@ -879,13 +889,22 @@ def __name__(self, value): setattr(self._fsproxy_slow, "__name__", value) +def _assert_fast_slow_eq(left, right, **kwargs): + assert_func = ( + assert_eq + if not kwargs.get("assert_func") + else kwargs.get("assert_func") + ) + if type(left).__name__ in _TYPES: + assert_func(left, right) + + def _fast_slow_function_call( func: Callable, - /, - *args, - debug_mode: str = "mode.pandas_debugging", - assert_func: Callable = assert_eq, - **kwargs, + debug_mode: str | None = None, + assert_func: Callable | None = None, + *args: Sequence[Any], + **kwargs: Mapping[Any, Any], ) -> Any: """ Call `func` with all `args` and `kwargs` converted to their @@ -911,12 +930,10 @@ def _fast_slow_function_call( raise Exception() fast = True - if get_option(debug_mode) | _env_get_bool( - "CUDF_PANDAS_DEBUG", False - ): + if get_option(debug_mode): try: with nvtx.annotate( - "EXECUTE_SLOW", + "EXECUTE_SLOW_DEBUG", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], domain="cudf_pandas", ): @@ -928,17 +945,18 @@ def _fast_slow_function_call( slow_result = func(*slow_args, **slow_kwargs) except Exception as e: warnings.warn( - "The result from pandas could not be computed correctly. " + "The result from pandas could not be computed. " f"The exception was {e}." ) else: try: - if type(result).__name__ in _TYPES: - assert_func(result, slow_result) - except AssertionError as ae: + _assert_fast_slow_eq( + result, slow_result, assert_func=assert_func + ) + except AssertionError as e: warnings.warn( "The results from cudf and pandas were different. " - f"The exception was {ae}." + f"The exception was {e}." ) except Exception as e: warnings.warn( diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 1f5768b6488..66d23ada8b9 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1424,15 +1424,21 @@ def test_holidays_within_dates(holiday, start, expected): def test_pandas_debugging_mode_option(monkeypatch): - from cudf import Series, set_option - - set_option("mode.pandas_debugging", True) + from cudf import Series, option_context def mock_mean(self, *args, **kwargs): return 1.0 - monkeypatch.setattr(Series, "mean", mock_mean) - with pytest.warns(UserWarning): + with option_context("mode.pandas_debugging", True): + import cudf.pandas + + cudf.pandas.install() + import pandas as xpd + + monkeypatch.setattr(Series, "mean", mock_mean) s = xpd.Series([1, 2]) - assert s.mean() == 1.0 - set_option("mode.pandas_debugging", False) + with pytest.warns( + UserWarning, + match="The results from cudf and pandas were different.", + ): + assert s.mean() == 1.0 From 5b313ced9349f674b094e8c4a6df973f6d260087 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 30 May 2024 06:01:37 -0700 Subject: [PATCH 09/24] Address comments, refactor test --- python/cudf/cudf/pandas/fast_slow_proxy.py | 160 +++--------------- .../cudf_pandas_tests/test_cudf_pandas.py | 16 +- 2 files changed, 28 insertions(+), 148 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 1824a4d3669..df46b59a8d1 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -19,12 +19,13 @@ Literal, Mapping, Optional, - Sequence, Set, Tuple, Type, ) +import numpy as np + from cudf.testing._utils import assert_eq from ..options import get_option @@ -35,12 +36,6 @@ def call_operator(fn, args, kwargs): return fn(*args, **kwargs) -DEBUG_MODE = ( - "mode.pandas_debugging" if get_option("mode.pandas_debugging") else None -) -ASSERT_FUNC = None - - _CUDF_PANDAS_NVTX_COLORS = { "COPY_SLOW_TO_FAST": 0xCA0020, "COPY_FAST_TO_SLOW": 0xF4A582, @@ -185,8 +180,7 @@ def __init__(self, *args, **kwargs): lambda cls, args, kwargs: setattr( self, "_fsproxy_wrapped", cls(*args, **kwargs) ), - DEBUG_MODE, - ASSERT_FUNC, + get_option("mode.pandas_debugging"), type(self), args, kwargs, @@ -716,8 +710,7 @@ def __call__(self, *args, **kwargs) -> Any: # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call call_operator, - DEBUG_MODE, - ASSERT_FUNC, + get_option("mode.pandas_debugging"), self, args, kwargs, @@ -831,7 +824,10 @@ def __get__(self, instance, owner) -> Any: else: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( - getattr, DEBUG_MODE, ASSERT_FUNC, owner, self._name + getattr, + get_option("mode.pandas_debugging"), + owner, + self._name, ) if isinstance( @@ -853,7 +849,10 @@ def __get__(self, instance, owner) -> Any: None, # type: ignore ) return _fast_slow_function_call( - getattr, DEBUG_MODE, ASSERT_FUNC, instance, self._name + getattr, + get_option("mode.pandas_debugging"), + instance, + self._name, )[0] return self._attr @@ -890,21 +889,16 @@ def __name__(self, value): def _assert_fast_slow_eq(left, right, **kwargs): - assert_func = ( - assert_eq - if not kwargs.get("assert_func") - else kwargs.get("assert_func") - ) - if type(left).__name__ in _TYPES: + assert_func = kwargs.get("assert_func", assert_eq) + if _is_final_type(type(left)) or (type(left) in NUMPY_TYPES): assert_func(left, right) def _fast_slow_function_call( func: Callable, - debug_mode: str | None = None, - assert_func: Callable | None = None, - *args: Sequence[Any], - **kwargs: Mapping[Any, Any], + mode_pandas_debugging: bool | None = None, + *args, + **kwargs, ) -> Any: """ Call `func` with all `args` and `kwargs` converted to their @@ -929,8 +923,7 @@ def _fast_slow_function_call( # try slow path raise Exception() fast = True - - if get_option(debug_mode): + if mode_pandas_debugging: try: with nvtx.annotate( "EXECUTE_SLOW_DEBUG", @@ -950,9 +943,7 @@ def _fast_slow_function_call( ) else: try: - _assert_fast_slow_eq( - result, slow_result, assert_func=assert_func - ) + _assert_fast_slow_eq(result, slow_result) except AssertionError as e: warnings.warn( "The results from cudf and pandas were different. " @@ -1208,118 +1199,7 @@ def _replace_closurevars( ) -_TYPES: Set[str] = { - "Timedelta", - "Timestamp", - "DataFrame", - "Series", - "Index", - "RangeIndex", - "SparseDtype", - "SparseArray", - "CategoricalIndex", - "Categorical", - "CategoricalDtype", - "DatetimeIndex", - "DatetimeArray", - "DatetimeTZDtype", - "TimedeltaIndex", - "NumpyExtensionArray", - "PandasArray", - "TimedeltaArray", - "PeriodIndex", - "PeriodArray", - "PeriodDtype", - "Period", - "MultiIndex", - "Grouper", - "StringArray", - "StringDtype", - "BooleanArray", - "BooleanDtype", - "IntegerArray", - "Int8Dtype", - "Int16Dtype", - "Int32Dtype", - "Int64Dtype", - "UInt8Dtype", - "UInt16Dtype", - "UInt32Dtype", - "UInt64Dtype", - "IntervalIndex", - "IntervalArray", - "IntervalDtype", - "Interval", - "FloatingArray", - "Float32Dtype", - "Float64Dtype", - "FixedForwardWindowIndexer", - "VariableOffsetWindowIndexer", - "HDFStore", - "ExcelFile", - "ExcelWriter", - "Styler", - "USFederalHolidayCalendar", - "HolidayCalendarMetaClass", - "AbstractHolidayCalendar", - "Holiday", - "USThanksgivingDay", - "USColumbusDay", - "USLaborDay", - "USMemorialDay", - "USMartinLutherKingJr", - "USPresidentsDay", - "GoodFriday", - "EasterMonday", - "FY5253", - "BDay", - "BMonthBegin", - "BMonthEnd", - "BQuarterBegin", - "BQuarterEnd", - "BusinessDay", - "BusinessHour", - "BusinessMonthBegin", - "BusinessMonthEnd", - "BYearBegin", - "BYearEnd", - "CBMonthBegin", - "CBMonthEnd", - "CDay", - "CustomBusinessDay", - "CustomBusinessHour", - "CustomBusinessMonthBegin", - "CustomBusinessMonthEnd", - "DateOffset", - "BaseOffset", - "Day", - "Easter", - "FY5253Quarter", - "Hour", - "LastWeekOfMonth", - "Micro", - "Milli", - "Minute", - "MonthBegin", - "MonthEnd", - "Nano", - "QuarterBegin", - "QuarterEnd", - "Second", - "SemiMonthBegin", - "SemiMonthEnd", - "Tick", - "Week", - "WeekOfMonth", - "YearBegin", - "YearEnd", - "Flags", - "NamedAgg", - "ArrowExtensionArray", - "int", - "str", - "float", -} +NUMPY_TYPES: Set[str] = set(np.sctypeDict.values()) _SPECIAL_METHODS: Set[str] = { diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 66d23ada8b9..c2f4a0f9a65 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1424,21 +1424,21 @@ def test_holidays_within_dates(holiday, start, expected): def test_pandas_debugging_mode_option(monkeypatch): + import cudf.pandas from cudf import Series, option_context - def mock_mean(self, *args, **kwargs): - return 1.0 + cudf.pandas.install() + import pandas as xpd - with option_context("mode.pandas_debugging", True): - import cudf.pandas + def mock_mean(self, *args, **kwargs): + return np.float64(1.0) - cudf.pandas.install() - import pandas as xpd + monkeypatch.setattr(Series, "mean", mock_mean) - monkeypatch.setattr(Series, "mean", mock_mean) + with option_context("mode.pandas_debugging", True): s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="The results from cudf and pandas were different.", ): - assert s.mean() == 1.0 + assert s.mean() == np.float64(1.0) From a25a3e6982182005dcb21670bbdb76bea10c2250 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 30 May 2024 06:08:15 -0700 Subject: [PATCH 10/24] add kwargs to assert func --- python/cudf/cudf/pandas/fast_slow_proxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index df46b59a8d1..fb554a626cd 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -943,7 +943,7 @@ def _fast_slow_function_call( ) else: try: - _assert_fast_slow_eq(result, slow_result) + _assert_fast_slow_eq(result, slow_result, **kwargs) except AssertionError as e: warnings.warn( "The results from cudf and pandas were different. " From ff3408d08362b54630dd687393a0735ca3521317 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 30 May 2024 06:38:18 -0700 Subject: [PATCH 11/24] Add pandas debug option in missing places --- python/cudf/cudf/pandas/_wrappers/pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 2e3880e14f6..72b76adf2bd 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -128,6 +128,7 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): # hence this method is needed. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), + cudf.get_option("mode.pandas_debugging"), cls, args, kwargs, @@ -251,6 +252,7 @@ def Index__new__(cls, *args, **kwargs): # make_final_proxy_type provides. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), + cudf.get_option("mode.pandas_debugging"), cls, args, kwargs, From 8c618d7854983c0841eb4f2b5c1c318328f3fedf Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 30 May 2024 07:42:30 -0700 Subject: [PATCH 12/24] Use env var instead option --- python/cudf/cudf/options.py | 15 --------------- python/cudf/cudf/pandas/_wrappers/pandas.py | 4 ++-- python/cudf/cudf/pandas/fast_slow_proxy.py | 10 +++++----- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 6 ++++-- 4 files changed, 11 insertions(+), 24 deletions(-) diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index 7ffa6b9fc56..efa8eabd8b8 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -308,21 +308,6 @@ def _integer_and_none_validator(val): _make_contains_validator([False, True]), ) -_register_option( - "mode.pandas_debugging", - _env_get_bool("MODE_PANDAS_DEBUGGING", False), - textwrap.dedent( - """ - If set to `True`, enables cudf.pandas debugging mode. - When enabled, cudf code paths in cudf.pandas will - also run with pandas and raise a warning if the - results from cudf and pandas differ. - If set to `False`, cudf.pandas debugging is disabled. - """ - ), - _make_contains_validator([False, True]), -) - class option_context(ContextDecorator): """ diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 72b76adf2bd..8a3a91b6f59 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -128,7 +128,7 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): # hence this method is needed. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.get_option("mode.pandas_debugging"), + cudf.options._env_get_bool("MODE_PANDAS_DEBUGGING", False), cls, args, kwargs, @@ -252,7 +252,7 @@ def Index__new__(cls, *args, **kwargs): # make_final_proxy_type provides. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.get_option("mode.pandas_debugging"), + cudf.options._env_get_bool("MODE_PANDAS_DEBUGGING", False), cls, args, kwargs, diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index fb554a626cd..c94320c39a2 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -28,7 +28,7 @@ from cudf.testing._utils import assert_eq -from ..options import get_option +from ..options import _env_get_bool from .annotation import nvtx @@ -180,7 +180,7 @@ def __init__(self, *args, **kwargs): lambda cls, args, kwargs: setattr( self, "_fsproxy_wrapped", cls(*args, **kwargs) ), - get_option("mode.pandas_debugging"), + _env_get_bool("MODE_PANDAS_DEBUGGING", False), type(self), args, kwargs, @@ -710,7 +710,7 @@ def __call__(self, *args, **kwargs) -> Any: # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call call_operator, - get_option("mode.pandas_debugging"), + _env_get_bool("MODE_PANDAS_DEBUGGING", False), self, args, kwargs, @@ -825,7 +825,7 @@ def __get__(self, instance, owner) -> Any: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( getattr, - get_option("mode.pandas_debugging"), + _env_get_bool("MODE_PANDAS_DEBUGGING", False), owner, self._name, ) @@ -850,7 +850,7 @@ def __get__(self, instance, owner) -> Any: ) return _fast_slow_function_call( getattr, - get_option("mode.pandas_debugging"), + _env_get_bool("MODE_PANDAS_DEBUGGING", False), instance, self._name, )[0] diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index c2f4a0f9a65..71eb291e28a 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -6,11 +6,13 @@ import copy import datetime import operator +import os import pathlib import pickle import tempfile import types from io import BytesIO, StringIO +from unittest import mock import numpy as np import pyarrow as pa @@ -1425,7 +1427,7 @@ def test_holidays_within_dates(holiday, start, expected): def test_pandas_debugging_mode_option(monkeypatch): import cudf.pandas - from cudf import Series, option_context + from cudf import Series cudf.pandas.install() import pandas as xpd @@ -1435,7 +1437,7 @@ def mock_mean(self, *args, **kwargs): monkeypatch.setattr(Series, "mean", mock_mean) - with option_context("mode.pandas_debugging", True): + with mock.patch.dict(os.environ, {"MODE_PANDAS_DEBUGGING": "True"}): s = xpd.Series([1, 2]) with pytest.warns( UserWarning, From 70b335ef7024779d820d103a70b5525be26e1d0d Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 30 May 2024 10:46:57 -0700 Subject: [PATCH 13/24] Add CUDF_ to env var, refactor test --- python/cudf/cudf/pandas/_wrappers/pandas.py | 4 ++-- python/cudf/cudf/pandas/fast_slow_proxy.py | 12 ++++++------ python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 8 +++----- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 8a3a91b6f59..c6e07780edb 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -128,7 +128,7 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): # hence this method is needed. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.options._env_get_bool("MODE_PANDAS_DEBUGGING", False), + cudf.options._env_get_bool("CUDF_PANDAS_DEBUGGING", False), cls, args, kwargs, @@ -252,7 +252,7 @@ def Index__new__(cls, *args, **kwargs): # make_final_proxy_type provides. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.options._env_get_bool("MODE_PANDAS_DEBUGGING", False), + cudf.options._env_get_bool("CUDF_PANDAS_DEBUGGING", False), cls, args, kwargs, diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index c94320c39a2..0c41438cbfa 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -180,7 +180,7 @@ def __init__(self, *args, **kwargs): lambda cls, args, kwargs: setattr( self, "_fsproxy_wrapped", cls(*args, **kwargs) ), - _env_get_bool("MODE_PANDAS_DEBUGGING", False), + _env_get_bool("CUDF_PANDAS_DEBUGGING", False), type(self), args, kwargs, @@ -710,7 +710,7 @@ def __call__(self, *args, **kwargs) -> Any: # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call call_operator, - _env_get_bool("MODE_PANDAS_DEBUGGING", False), + _env_get_bool("CUDF_PANDAS_DEBUGGING", False), self, args, kwargs, @@ -825,7 +825,7 @@ def __get__(self, instance, owner) -> Any: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( getattr, - _env_get_bool("MODE_PANDAS_DEBUGGING", False), + _env_get_bool("CUDF_PANDAS_DEBUGGING", False), owner, self._name, ) @@ -850,7 +850,7 @@ def __get__(self, instance, owner) -> Any: ) return _fast_slow_function_call( getattr, - _env_get_bool("MODE_PANDAS_DEBUGGING", False), + _env_get_bool("CUDF_PANDAS_DEBUGGING", False), instance, self._name, )[0] @@ -896,7 +896,7 @@ def _assert_fast_slow_eq(left, right, **kwargs): def _fast_slow_function_call( func: Callable, - mode_pandas_debugging: bool | None = None, + cudf_pandas_debugging: bool | None = None, *args, **kwargs, ) -> Any: @@ -923,7 +923,7 @@ def _fast_slow_function_call( # try slow path raise Exception() fast = True - if mode_pandas_debugging: + if cudf_pandas_debugging: try: with nvtx.annotate( "EXECUTE_SLOW_DEBUG", diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 71eb291e28a..1a5e332eee3 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -6,13 +6,11 @@ import copy import datetime import operator -import os import pathlib import pickle import tempfile import types from io import BytesIO, StringIO -from unittest import mock import numpy as np import pyarrow as pa @@ -1435,9 +1433,9 @@ def test_pandas_debugging_mode_option(monkeypatch): def mock_mean(self, *args, **kwargs): return np.float64(1.0) - monkeypatch.setattr(Series, "mean", mock_mean) - - with mock.patch.dict(os.environ, {"MODE_PANDAS_DEBUGGING": "True"}): + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(Series, "mean", mock_mean) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( UserWarning, From dc4f5ae3a12bc187d246836d350554f030071f5d Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 05:14:14 -0700 Subject: [PATCH 14/24] Add two more tests --- .../cudf_pandas_tests/test_cudf_pandas.py | 43 +++++++++++++++---- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 1a5e332eee3..3b370d26faa 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -40,8 +40,9 @@ get_calendar, ) -# Accelerated pandas has the real pandas module as an attribute +# Accelerated pandas has the real pandas and cudf modules as an attributes pd = xpd._fsproxy_slow +cudf = xpd._fsproxy_fast @pytest.fixture @@ -1423,18 +1424,12 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -def test_pandas_debugging_mode_option(monkeypatch): - import cudf.pandas - from cudf import Series - - cudf.pandas.install() - import pandas as xpd - +def test_cudf_pandas_debugging_different_results(monkeypatch): def mock_mean(self, *args, **kwargs): return np.float64(1.0) with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(Series, "mean", mock_mean) + monkeycontext.setattr(cudf.Series, "mean", mock_mean) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( @@ -1442,3 +1437,33 @@ def mock_mean(self, *args, **kwargs): match="The results from cudf and pandas were different.", ): assert s.mean() == np.float64(1.0) + + +def test_cudf_pandas_debugging_pandas_error(monkeypatch): + def mock_mean(self, *args, **kwargs): + raise Exception() + + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(pd.Series, "mean", mock_mean) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) + with pytest.warns( + UserWarning, + match="The result from pandas could not be computed.", + ): + assert s.mean() == 1.5 + + +def test_cudf_pandas_debugging_failed(monkeypatch): + def mock_mean(self, *args, **kwargs): + return None + + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(pd.Series, "mean", mock_mean) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) + with pytest.warns( + UserWarning, + match="Pandas debugging mode failed.", + ): + assert s.mean() == 1.5 From 737a893d0fb5831dfc793c40a0cd2caa824c441f Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 05:50:36 -0700 Subject: [PATCH 15/24] Change mocked function names --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 3b370d26faa..67ca809cde3 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1425,11 +1425,11 @@ def test_holidays_within_dates(holiday, start, expected): def test_cudf_pandas_debugging_different_results(monkeypatch): - def mock_mean(self, *args, **kwargs): + def mock_mean_float(self, *args, **kwargs): return np.float64(1.0) with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(cudf.Series, "mean", mock_mean) + monkeycontext.setattr(cudf.Series, "mean", mock_mean_float) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( @@ -1440,11 +1440,11 @@ def mock_mean(self, *args, **kwargs): def test_cudf_pandas_debugging_pandas_error(monkeypatch): - def mock_mean(self, *args, **kwargs): + def mock_mean_exception(self, *args, **kwargs): raise Exception() with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "mean", mock_mean) + monkeycontext.setattr(pd.Series, "mean", mock_mean_exception) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( @@ -1455,11 +1455,11 @@ def mock_mean(self, *args, **kwargs): def test_cudf_pandas_debugging_failed(monkeypatch): - def mock_mean(self, *args, **kwargs): + def mock_mean_none(self, *args, **kwargs): return None with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "mean", mock_mean) + monkeycontext.setattr(pd.Series, "mean", mock_mean_none) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( From a2638b9b7c6b6465da30e0bee395dbdc16c66d48 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 07:30:24 -0700 Subject: [PATCH 16/24] Call undo --- .../cudf_pandas_tests/test_cudf_pandas.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 67ca809cde3..b3f2054d12e 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1424,11 +1424,17 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -def test_cudf_pandas_debugging_different_results(monkeypatch): +@pytest.fixture +def undo_monkeypatch(monkeypatch): + yield monkeypatch + monkeypatch.undo() + + +def test_cudf_pandas_debugging_different_results(undo_monkeypatch): def mock_mean_float(self, *args, **kwargs): return np.float64(1.0) - with monkeypatch.context() as monkeycontext: + with undo_monkeypatch.context() as monkeycontext: monkeycontext.setattr(cudf.Series, "mean", mock_mean_float) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) @@ -1437,13 +1443,14 @@ def mock_mean_float(self, *args, **kwargs): match="The results from cudf and pandas were different.", ): assert s.mean() == np.float64(1.0) + assert s.mean() == 1.5 -def test_cudf_pandas_debugging_pandas_error(monkeypatch): +def test_cudf_pandas_debugging_pandas_error(undo_monkeypatch): def mock_mean_exception(self, *args, **kwargs): raise Exception() - with monkeypatch.context() as monkeycontext: + with undo_monkeypatch.context() as monkeycontext: monkeycontext.setattr(pd.Series, "mean", mock_mean_exception) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) @@ -1454,11 +1461,11 @@ def mock_mean_exception(self, *args, **kwargs): assert s.mean() == 1.5 -def test_cudf_pandas_debugging_failed(monkeypatch): +def test_cudf_pandas_debugging_failed(undo_monkeypatch): def mock_mean_none(self, *args, **kwargs): return None - with monkeypatch.context() as monkeycontext: + with undo_monkeypatch.context() as monkeycontext: monkeycontext.setattr(pd.Series, "mean", mock_mean_none) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) From 3555fd9083c72c1c576958deebac89fad89b3b78 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 09:04:16 -0700 Subject: [PATCH 17/24] Combine tests --- .../cudf_pandas_tests/test_cudf_pandas.py | 77 ++++++++----------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index b3f2054d12e..5ad9cbdbf89 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1424,53 +1424,38 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -@pytest.fixture -def undo_monkeypatch(monkeypatch): - yield monkeypatch - monkeypatch.undo() - - -def test_cudf_pandas_debugging_different_results(undo_monkeypatch): - def mock_mean_float(self, *args, **kwargs): - return np.float64(1.0) - - with undo_monkeypatch.context() as monkeycontext: - monkeycontext.setattr(cudf.Series, "mean", mock_mean_float) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") - s = xpd.Series([1, 2]) - with pytest.warns( - UserWarning, - match="The results from cudf and pandas were different.", - ): - assert s.mean() == np.float64(1.0) - assert s.mean() == 1.5 - - -def test_cudf_pandas_debugging_pandas_error(undo_monkeypatch): - def mock_mean_exception(self, *args, **kwargs): - raise Exception() - - with undo_monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "mean", mock_mean_exception) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") - s = xpd.Series([1, 2]) - with pytest.warns( - UserWarning, - match="The result from pandas could not be computed.", - ): - assert s.mean() == 1.5 - - -def test_cudf_pandas_debugging_failed(undo_monkeypatch): - def mock_mean_none(self, *args, **kwargs): - return None - - with undo_monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "mean", mock_mean_none) - monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") +@pytest.mark.parametrize( + "mock_mean_func, expected_mean, warning_message, patch_object", + [ + ( + lambda self, *args, **kwargs: np.float64(1.0), + np.float64(1.0), + "The results from cudf and pandas were different.", + cudf.Series, + ), + ( + lambda self, *args, **kwargs: Exception(), + 1.5, + "The result from pandas could not be computed.", + pd.Series, + ), + ( + lambda self, *args, **kwargs: None, + 1.5, + "Pandas debugging mode failed.", + pd.Series, + ), + ], +) +def test_cudf_pandas_debugging( + monkeypatch, mock_mean_func, expected_mean, warning_message, patch_object +): + with monkeypatch.context() as mp: + mp.setattr(patch_object, "mean", mock_mean_func) + mp.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( UserWarning, - match="Pandas debugging mode failed.", + match=warning_message, ): - assert s.mean() == 1.5 + assert s.mean() == expected_mean From f1afdea9f923a1efbcaff5ee5d59eef9c923689c Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 10:11:07 -0700 Subject: [PATCH 18/24] Use unittest.mock instead of monkeypatch --- .../cudf_pandas_tests/test_cudf_pandas.py | 68 ++++++++++--------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 5ad9cbdbf89..82d02a64ca1 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -10,6 +10,7 @@ import pickle import tempfile import types +import unittest.mock as mock from io import BytesIO, StringIO import numpy as np @@ -1424,38 +1425,43 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -@pytest.mark.parametrize( - "mock_mean_func, expected_mean, warning_message, patch_object", - [ - ( - lambda self, *args, **kwargs: np.float64(1.0), - np.float64(1.0), - "The results from cudf and pandas were different.", - cudf.Series, - ), - ( - lambda self, *args, **kwargs: Exception(), - 1.5, - "The result from pandas could not be computed.", - pd.Series, - ), - ( - lambda self, *args, **kwargs: None, - 1.5, - "Pandas debugging mode failed.", - pd.Series, - ), - ], -) -def test_cudf_pandas_debugging( - monkeypatch, mock_mean_func, expected_mean, warning_message, patch_object -): - with monkeypatch.context() as mp: - mp.setattr(patch_object, "mean", mock_mean_func) - mp.setenv("CUDF_PANDAS_DEBUGGING", "True") +def test_cudf_pandas_debugging_different_results(): + with mock.patch( + "cudf.Series.mean", return_value=np.float64(1.0) + ), mock.patch.dict("os.environ", {"CUDF_PANDAS_DEBUGGING": "True"}): s = xpd.Series([1, 2]) + + with pytest.warns( + UserWarning, + match="The results from cudf and pandas were different.", + ): + assert s.mean() == np.float64(1.0) + + +def test_cudf_pandas_debugging_pandas_error(): + def mock_mean_exception(self, *args, **kwargs): + raise Exception() + + with mock.patch( + "pandas.Series.mean", mock_mean_exception + ), mock.patch.dict("os.environ", {"CUDF_PANDAS_DEBUGGING": "True"}): + s = xpd.Series([1, 2]) + + with pytest.warns( + UserWarning, + match="The result from pandas could not be computed.", + ): + assert s.mean() == 1.5 + + +def test_cudf_pandas_debugging_failed(): + with mock.patch("pandas.Series.mean", return_value=None), mock.patch.dict( + "os.environ", {"CUDF_PANDAS_DEBUGGING": "True"} + ): + s = xpd.Series([1, 2]) + with pytest.warns( UserWarning, - match=warning_message, + match="Pandas debugging mode failed.", ): - assert s.mean() == expected_mean + assert s.mean() == 1.5 From 2bf0b7561122bf6a6e7dded4fd9c139ea1032e4b Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 12:21:47 -0700 Subject: [PATCH 19/24] patch different funcs in each test --- .../cudf_pandas_tests/test_cudf_pandas.py | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 82d02a64ca1..f74a230ed74 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -10,7 +10,6 @@ import pickle import tempfile import types -import unittest.mock as mock from io import BytesIO, StringIO import numpy as np @@ -1425,12 +1424,14 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] -def test_cudf_pandas_debugging_different_results(): - with mock.patch( - "cudf.Series.mean", return_value=np.float64(1.0) - ), mock.patch.dict("os.environ", {"CUDF_PANDAS_DEBUGGING": "True"}): - s = xpd.Series([1, 2]) +def test_cudf_pandas_debugging_different_results(monkeypatch): + def mock_mean(self, *args, **kwargs): + return np.float64(1.0) + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(cudf.Series, "mean", mock_mean) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="The results from cudf and pandas were different.", @@ -1438,15 +1439,14 @@ def test_cudf_pandas_debugging_different_results(): assert s.mean() == np.float64(1.0) -def test_cudf_pandas_debugging_pandas_error(): - def mock_mean_exception(self, *args, **kwargs): +def test_cudf_pandas_debugging_pandas_error(monkeypatch): + def mock_median(self, *args, **kwargs): raise Exception() - with mock.patch( - "pandas.Series.mean", mock_mean_exception - ), mock.patch.dict("os.environ", {"CUDF_PANDAS_DEBUGGING": "True"}): + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(pd.Series, "median", mock_median) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) - with pytest.warns( UserWarning, match="The result from pandas could not be computed.", @@ -1454,14 +1454,16 @@ def mock_mean_exception(self, *args, **kwargs): assert s.mean() == 1.5 -def test_cudf_pandas_debugging_failed(): - with mock.patch("pandas.Series.mean", return_value=None), mock.patch.dict( - "os.environ", {"CUDF_PANDAS_DEBUGGING": "True"} - ): - s = xpd.Series([1, 2]) +def test_cudf_pandas_debugging_failed(monkeypatch): + def mock_std(self, *args, **kwargs): + return None + with monkeypatch.context() as monkeycontext: + monkeycontext.setattr(pd.Series, "std", mock_std) + monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") + s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="Pandas debugging mode failed.", ): - assert s.mean() == 1.5 + assert s.std() == 0.7071067811865476 From 11d8aba6291e5f6deb36853defc6e802aa6fd890 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 31 May 2024 12:55:51 -0700 Subject: [PATCH 20/24] type median in test 2 --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index f74a230ed74..f552da0494e 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1451,7 +1451,7 @@ def mock_median(self, *args, **kwargs): UserWarning, match="The result from pandas could not be computed.", ): - assert s.mean() == 1.5 + assert s.median() == 1.5 def test_cudf_pandas_debugging_failed(monkeypatch): From af0d9cedc3a9b4131bb05f134b61e33ac8d12369 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 3 Jun 2024 08:14:05 -0700 Subject: [PATCH 21/24] Use monkeypatch.setattr to undo the monkeypatches --- python/cudf/cudf/pandas/fast_slow_proxy.py | 5 ++- .../cudf_pandas_tests/test_cudf_pandas.py | 33 +++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 0c41438cbfa..0a0ffc650ec 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -26,9 +26,8 @@ import numpy as np -from cudf.testing._utils import assert_eq - from ..options import _env_get_bool +from ..testing._utils import assert_eq from .annotation import nvtx @@ -890,7 +889,7 @@ def __name__(self, value): def _assert_fast_slow_eq(left, right, **kwargs): assert_func = kwargs.get("assert_func", assert_eq) - if _is_final_type(type(left)) or (type(left) in NUMPY_TYPES): + if _is_final_type(type(left)) or type(left) in NUMPY_TYPES: assert_func(left, right) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index f552da0494e..1a44182381d 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -40,7 +40,7 @@ get_calendar, ) -# Accelerated pandas has the real pandas and cudf modules as an attributes +# Accelerated pandas has the real pandas and cudf modules as attributes pd = xpd._fsproxy_slow cudf = xpd._fsproxy_fast @@ -1425,45 +1425,58 @@ def test_holidays_within_dates(holiday, start, expected): def test_cudf_pandas_debugging_different_results(monkeypatch): - def mock_mean(self, *args, **kwargs): + cudf_mean = cudf.Series.mean + + def mock_mean_one(self, *args, **kwargs): return np.float64(1.0) with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(cudf.Series, "mean", mock_mean) + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean_one) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="The results from cudf and pandas were different.", ): - assert s.mean() == np.float64(1.0) + assert s.mean() == 1.0 + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean) def test_cudf_pandas_debugging_pandas_error(monkeypatch): - def mock_median(self, *args, **kwargs): + pd_mean = pd.Series.mean + + def mock_mean_exception(self, *args, **kwargs): raise Exception() with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "median", mock_median) + monkeycontext.setattr( + xpd.Series.mean, "_fsproxy_slow", mock_mean_exception + ) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="The result from pandas could not be computed.", ): - assert s.median() == 1.5 + s = xpd.Series([1, 2]) + assert s.mean() == 1.5 + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) def test_cudf_pandas_debugging_failed(monkeypatch): - def mock_std(self, *args, **kwargs): + pd_mean = pd.Series.mean + + def mock_mean_none(self, *args, **kwargs): return None with monkeypatch.context() as monkeycontext: - monkeycontext.setattr(pd.Series, "std", mock_std) + monkeycontext.setattr(xpd.Series.mean, "_fsproxy_slow", mock_mean_none) monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True") s = xpd.Series([1, 2]) with pytest.warns( UserWarning, match="Pandas debugging mode failed.", ): - assert s.std() == 0.7071067811865476 + s = xpd.Series([1, 2]) + assert s.mean() == 1.5 + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) From 4a6186814095ae49223bbaa61cd965c31b443703 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 7 Jun 2024 08:34:24 -0700 Subject: [PATCH 22/24] Address comments --- python/cudf/cudf/pandas/_wrappers/pandas.py | 2 -- python/cudf/cudf/pandas/fast_slow_proxy.py | 15 +++++---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index ebd8d0d428b..698dd946022 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -130,7 +130,6 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): # hence this method is needed. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.options._env_get_bool("CUDF_PANDAS_DEBUGGING", False), cls, args, kwargs, @@ -254,7 +253,6 @@ def Index__new__(cls, *args, **kwargs): # make_final_proxy_type provides. self, _ = _fast_slow_function_call( lambda cls, args, kwargs: cls(*args, **kwargs), - cudf.options._env_get_bool("CUDF_PANDAS_DEBUGGING", False), cls, args, kwargs, diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index f7706098023..5f4cf2e6cc6 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -168,7 +168,6 @@ def __init__(self, *args, **kwargs): lambda cls, args, kwargs: setattr( self, "_fsproxy_wrapped", cls(*args, **kwargs) ), - _env_get_bool("CUDF_PANDAS_DEBUGGING", False), type(self), args, kwargs, @@ -701,7 +700,6 @@ def __call__(self, *args, **kwargs) -> Any: # TODO: When Python 3.11 is the minimum supported Python version # this can use operator.call call_operator, - _env_get_bool("CUDF_PANDAS_DEBUGGING", False), self, args, kwargs, @@ -816,7 +814,6 @@ def __get__(self, instance, owner) -> Any: # for anything else, use a fast-slow attribute: self._attr, _ = _fast_slow_function_call( getattr, - _env_get_bool("CUDF_PANDAS_DEBUGGING", False), owner, self._name, ) @@ -841,7 +838,6 @@ def __get__(self, instance, owner) -> Any: ) return _fast_slow_function_call( getattr, - _env_get_bool("CUDF_PANDAS_DEBUGGING", False), instance, self._name, )[0] @@ -879,15 +875,14 @@ def __name__(self, value): setattr(self._fsproxy_slow, "__name__", value) -def _assert_fast_slow_eq(left, right, **kwargs): - assert_func = kwargs.get("assert_func", assert_eq) +def _assert_fast_slow_eq(left, right): if _is_final_type(type(left)) or type(left) in NUMPY_TYPES: - assert_func(left, right) + assert_eq(left, right) def _fast_slow_function_call( func: Callable, - cudf_pandas_debugging: bool | None = None, + /, *args, **kwargs, ) -> Any: @@ -914,7 +909,7 @@ def _fast_slow_function_call( # try slow path raise Exception() fast = True - if cudf_pandas_debugging: + if _env_get_bool("CUDF_PANDAS_DEBUGGING", False): try: with nvtx.annotate( "EXECUTE_SLOW_DEBUG", @@ -934,7 +929,7 @@ def _fast_slow_function_call( ) else: try: - _assert_fast_slow_eq(result, slow_result, **kwargs) + _assert_fast_slow_eq(result, slow_result) except AssertionError as e: warnings.warn( "The results from cudf and pandas were different. " From 688034788c0b3a8cdcd413b695fd334775df2fba Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 7 Jun 2024 16:00:37 -0700 Subject: [PATCH 23/24] add comment to test --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 99d462dea9a..d4bc639357e 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1440,6 +1440,7 @@ def mock_mean_one(self, *args, **kwargs): match="The results from cudf and pandas were different.", ): assert s.mean() == 1.0 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean) From 7a9a9e61e894b58f09fd316e59cfbee37ebf9e69 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 7 Jun 2024 16:05:06 -0700 Subject: [PATCH 24/24] add comments to tests --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index d4bc639357e..72e9ad5fca3 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1462,6 +1462,7 @@ def mock_mean_exception(self, *args, **kwargs): ): s = xpd.Series([1, 2]) assert s.mean() == 1.5 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean) @@ -1481,6 +1482,7 @@ def mock_mean_none(self, *args, **kwargs): ): s = xpd.Series([1, 2]) assert s.mean() == 1.5 + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean)