From 8ac09978945bdb4fa4e699b5db02f5eae4b0cb31 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 23 May 2024 15:32:19 -0700 Subject: [PATCH] Address comments, delete test --- python/cudf/cudf/options.py | 10 +- python/cudf/cudf/pandas/fast_slow_proxy.py | 131 +++++++++++++++++- .../cudf_pandas_tests/test_fast_slow_proxy.py | 4 - 3 files changed, 132 insertions(+), 13 deletions(-) diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py index 673acb2c131..b2453156f39 100644 --- a/python/cudf/cudf/options.py +++ b/python/cudf/cudf/options.py @@ -313,11 +313,11 @@ def _integer_and_none_validator(val): False, textwrap.dedent( """ - If set to `False`, retains `cudf` specific behavior. - If set to `True`, enables pandas debugging mode, - which will raise a warning if the results from cudf - and pandas differ. - \tValid values are True or False. Default is False. + If set to `True`, enables cudf.pandas debugging mode. + When enabled, cudf code paths in cudf.pandas will + also run with pandas and raise a warning if the + results from cudf and pandas differ. + If set to `False`, cudf.pandas debugging is disabled. """ ), _make_contains_validator([False, True]), diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 60a398482ea..ea198a0198c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -9,6 +9,7 @@ import operator import pickle import types +import warnings from collections.abc import Iterator from enum import IntEnum from typing import ( @@ -23,9 +24,13 @@ Type, ) +from cudf.testing._utils import assert_eq + from ..options import get_option from .annotation import nvtx +# from cudf.pandas._wrappers.pandas import Timedelta, Timestamp, DataFrame, Series, Index, RangeIndex, SparseDtype, SparseArray, CategoricalIndex, Categorical, CategoricalDtype, DatetimeIndex, DatetimeArray, DatetimeTZDtype, TimedeltaIndex, NumpyExtensionArray, PandasArray, TimedeltaArray, PeriodIndex, PeriodArray, PeriodDtype, Period, MultiIndex, Grouper, StringArray, StringDtype, BooleanArray, BooleanDtype, IntegerArray, Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype, IntervalIndex, IntervalArray, IntervalDtype, Interval, FloatingArray, Float32Dtype, Float64Dtype, FixedForwardWindowIndexer, VariableOffsetWindowIndexer, HDFStore, ExcelFile, ExcelWriter, Styler, USFederalHolidayCalendar, HolidayCalendarMetaClass, AbstractHolidayCalendar, Holiday, USThanksgivingDay, USColumbusDay, USLaborDay, USMemorialDay, USMartinLutherKingJr, USPresidentsDay, GoodFriday, EasterMonday, FY5253, BDay, BMonthBegin, BMonthEnd, BQuarterBegin, BQuarterEnd, BusinessDay, BusinessHour, BusinessMonthBegin, BusinessMonthEnd, BYearBegin, BYearEnd, CBMonthBegin, CBMonthEnd, CDay, CustomBusinessDay, CustomBusinessHour, CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, BaseOffset, Day, Easter, FY5253Quarter, Hour, LastWeekOfMonth, Micro, Milli, Minute, MonthBegin, MonthEnd, Nano, QuarterBegin, QuarterEnd, Second, SemiMonthBegin, SemiMonthEnd, Tick, Week, WeekOfMonth, YearBegin, YearEnd, Flags, NamedAgg, ArrowExtensionArray + def call_operator(fn, args, kwargs): return fn(*args, **kwargs) @@ -902,15 +907,23 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any: if get_option("mode.pandas_debugging"): with nvtx.annotate( - "EXECUTE_SLOW", + "EXECUTE_SLOW_DEBUGGING", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], domain="cudf_pandas", ): slow_args, slow_kwargs = _slow_arg(args), _slow_arg(kwargs) with disable_module_accelerator(): - result_slow = func(*slow_args, **slow_kwargs) - print(result_slow) - # TODO: Compare result and result slow and return a warning + slow_result = func(*slow_args, **slow_kwargs) + print("FAST ", result, type(result)) + print("SLOW ", slow_result, type(slow_result)) + if type(result).__name__ in _CUDF_OBJ_FINAL_TYPES: + assert_eq(result, slow_result) + except AssertionError: + warnings.warn( + "The results from cudf and pandas were different. " + f"The types were {type(result)} and {type(slow_result)} for cudf and pandas, respectively." + ) + return _maybe_wrap_result(result, func, *args, **kwargs), fast except Exception: with nvtx.annotate( "EXECUTE_SLOW", @@ -1156,6 +1169,116 @@ def _replace_closurevars( ) +_CUDF_OBJ_FINAL_TYPES: Set[str] = { + "Timedelta", + "Timestamp", + "DataFrame", + "Series", + "Index", + "RangeIndex", + "SparseDtype", + "SparseArray", + "CategoricalIndex", + "Categorical", + "CategoricalDtype", + "DatetimeIndex", + "DatetimeArray", + "DatetimeTZDtype", + "TimedeltaIndex", + "NumpyExtensionArray", + "PandasArray", + "TimedeltaArray", + "PeriodIndex", + "PeriodArray", + "PeriodDtype", + "Period", + "MultiIndex", + "Grouper", + "StringArray", + "StringDtype", + "BooleanArray", + "BooleanDtype", + "IntegerArray", + "Int8Dtype", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "UInt8Dtype", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "IntervalIndex", + "IntervalArray", + "IntervalDtype", + "Interval", + "FloatingArray", + "Float32Dtype", + "Float64Dtype", + "FixedForwardWindowIndexer", + "VariableOffsetWindowIndexer", + "HDFStore", + "ExcelFile", + "ExcelWriter", + "Styler", + "USFederalHolidayCalendar", + "HolidayCalendarMetaClass", + "AbstractHolidayCalendar", + "Holiday", + "USThanksgivingDay", + "USColumbusDay", + "USLaborDay", + "USMemorialDay", + "USMartinLutherKingJr", + "USPresidentsDay", + "GoodFriday", + "EasterMonday", + "FY5253", + "BDay", + "BMonthBegin", + "BMonthEnd", + "BQuarterBegin", + "BQuarterEnd", + "BusinessDay", + "BusinessHour", + "BusinessMonthBegin", + "BusinessMonthEnd", + "BYearBegin", + "BYearEnd", + "CBMonthBegin", + "CBMonthEnd", + "CDay", + "CustomBusinessDay", + "CustomBusinessHour", + "CustomBusinessMonthBegin", + "CustomBusinessMonthEnd", + "DateOffset", + "BaseOffset", + "Day", + "Easter", + "FY5253Quarter", + "Hour", + "LastWeekOfMonth", + "Micro", + "Milli", + "Minute", + "MonthBegin", + "MonthEnd", + "Nano", + "QuarterBegin", + "QuarterEnd", + "Second", + "SemiMonthBegin", + "SemiMonthEnd", + "Tick", + "Week", + "WeekOfMonth", + "YearBegin", + "YearEnd", + "Flags", + "NamedAgg", + "ArrowExtensionArray", +} + _SPECIAL_METHODS: Set[str] = { "__abs__", "__add__", diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py index 3143a8d2223..8bc95f92214 100644 --- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py +++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py @@ -545,7 +545,3 @@ def test_tuple_with_attrs_transform(): assert b == bprime and b is not bprime assert c == cprime and c is not cprime assert d == dprime and d is not dprime - - -def test_fast_slow_function_call(): - assert True