Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an Environment Variable for debugging the fast path in cudf.pandas #15837

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
577cb93
Add a test
Matt711 May 24, 2024
925606d
Add a test
Matt711 May 28, 2024
ffd8ded
Add debug mode as an argument
Matt711 May 28, 2024
d7286e9
Add an env var
Matt711 May 28, 2024
b33f522
Change to xpd
Matt711 May 28, 2024
9dc9ba9
Add test with env var
Matt711 May 28, 2024
2e5c0da
Merge branch 'branch-24.08' into feature/combine-fast-and-slow-paths
Matt711 May 29, 2024
9699b31
Add argument for assert func, delete test with env var
Matt711 May 29, 2024
e421361
Address comments
Matt711 May 29, 2024
5b313ce
Address comments, refactor test
Matt711 May 30, 2024
a25a3e6
add kwargs to assert func
Matt711 May 30, 2024
ff3408d
Add pandas debug option in missing places
Matt711 May 30, 2024
8c618d7
Use env var instead option
Matt711 May 30, 2024
299fb8d
Merge branch 'branch-24.08' into feature/combine-fast-and-slow-paths
Matt711 May 30, 2024
70b335e
Add CUDF_ to env var, refactor test
Matt711 May 30, 2024
9350955
Merge branch 'branch-24.08' into feature/combine-fast-and-slow-paths
Matt711 May 30, 2024
dc4f5ae
Add two more tests
Matt711 May 31, 2024
7f8844d
Merge branch 'feature/combine-fast-and-slow-paths' of github.com:Matt…
Matt711 May 31, 2024
737a893
Change mocked function names
Matt711 May 31, 2024
a2638b9
Call undo
Matt711 May 31, 2024
3555fd9
Combine tests
Matt711 May 31, 2024
f1afdea
Use unittest.mock instead of monkeypatch
Matt711 May 31, 2024
2bf0b75
patch different funcs in each test
Matt711 May 31, 2024
11d8aba
type median in test 2
Matt711 May 31, 2024
af0d9ce
Use monkeypatch.setattr to undo the monkeypatches
Matt711 Jun 3, 2024
276c095
Fix merge conflicts
Matt711 Jun 4, 2024
4a61868
Address comments
Matt711 Jun 7, 2024
6880347
add comment to test
Matt711 Jun 7, 2024
7a9a9e6
add comments to tests
Matt711 Jun 7, 2024
3ad0967
Merge branch 'branch-24.08' into feature/combine-fast-and-slow-paths
galipremsagar Jun 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 58 additions & 5 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import operator
import pickle
import types
import warnings
from collections.abc import Iterator
from enum import IntEnum
from typing import (
Expand All @@ -23,6 +24,10 @@
Type,
)

import numpy as np

from ..options import _env_get_bool
from ..testing._utils import assert_eq
from .annotation import nvtx


Expand Down Expand Up @@ -808,7 +813,9 @@ def __get__(self, instance, owner) -> Any:
else:
# for anything else, use a fast-slow attribute:
self._attr, _ = _fast_slow_function_call(
getattr, owner, self._name
getattr,
owner,
self._name,
)

if isinstance(
Expand All @@ -829,9 +836,11 @@ def __get__(self, instance, owner) -> Any:
getattr(instance._fsproxy_slow, self._name),
None, # type: ignore
)
return _fast_slow_function_call(getattr, instance, self._name)[
0
]
return _fast_slow_function_call(
getattr,
instance,
self._name,
)[0]
return self._attr


Expand Down Expand Up @@ -866,7 +875,17 @@ def __name__(self, value):
setattr(self._fsproxy_slow, "__name__", value)


def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any:
def _assert_fast_slow_eq(left, right):
if _is_final_type(type(left)) or type(left) in NUMPY_TYPES:
assert_eq(left, right)


def _fast_slow_function_call(
func: Callable,
/,
*args,
**kwargs,
) -> Any:
"""
Call `func` with all `args` and `kwargs` converted to their
respective fast type. If that fails, call `func` with all
Expand All @@ -890,6 +909,37 @@ def _fast_slow_function_call(func: Callable, /, *args, **kwargs) -> Any:
# try slow path
raise Exception()
fast = True
if _env_get_bool("CUDF_PANDAS_DEBUGGING", False):
try:
with nvtx.annotate(
"EXECUTE_SLOW_DEBUG",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
domain="cudf_pandas",
):
slow_args, slow_kwargs = (
_slow_arg(args),
_slow_arg(kwargs),
)
with disable_module_accelerator():
slow_result = func(*slow_args, **slow_kwargs)
except Exception as e:
warnings.warn(
"The result from pandas could not be computed. "
f"The exception was {e}."
)
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
else:
try:
_assert_fast_slow_eq(result, slow_result)
except AssertionError as e:
warnings.warn(
"The results from cudf and pandas were different. "
f"The exception was {e}."
)
except Exception as e:
warnings.warn(
"Pandas debugging mode failed. "
f"The exception was {e}."
)
except Exception:
with nvtx.annotate(
"EXECUTE_SLOW",
Expand Down Expand Up @@ -1135,6 +1185,9 @@ def _replace_closurevars(
)


NUMPY_TYPES: Set[str] = set(np.sctypeDict.values())


_SPECIAL_METHODS: Set[str] = {
"__abs__",
"__add__",
Expand Down
64 changes: 63 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
get_calendar,
)

# Accelerated pandas has the real pandas module as an attribute
# Accelerated pandas has the real pandas and cudf modules as attributes
pd = xpd._fsproxy_slow
cudf = xpd._fsproxy_fast
Matt711 marked this conversation as resolved.
Show resolved Hide resolved


@pytest.fixture
Expand Down Expand Up @@ -1424,5 +1425,66 @@ def test_holidays_within_dates(holiday, start, expected):
) == [utc.localize(dt) for dt in expected]


def test_cudf_pandas_debugging_different_results(monkeypatch):
cudf_mean = cudf.Series.mean

def mock_mean_one(self, *args, **kwargs):
return np.float64(1.0)

with monkeypatch.context() as monkeycontext:
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean_one)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="The results from cudf and pandas were different.",
):
assert s.mean() == 1.0
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf_mean)
Matt711 marked this conversation as resolved.
Show resolved Hide resolved


def test_cudf_pandas_debugging_pandas_error(monkeypatch):
pd_mean = pd.Series.mean

def mock_mean_exception(self, *args, **kwargs):
raise Exception()

with monkeypatch.context() as monkeycontext:
monkeycontext.setattr(
xpd.Series.mean, "_fsproxy_slow", mock_mean_exception
)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="The result from pandas could not be computed.",
):
s = xpd.Series([1, 2])
assert s.mean() == 1.5
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean)


def test_cudf_pandas_debugging_failed(monkeypatch):
pd_mean = pd.Series.mean

def mock_mean_none(self, *args, **kwargs):
return None

with monkeypatch.context() as monkeycontext:
monkeycontext.setattr(xpd.Series.mean, "_fsproxy_slow", mock_mean_none)
monkeycontext.setenv("CUDF_PANDAS_DEBUGGING", "True")
s = xpd.Series([1, 2])
with pytest.warns(
UserWarning,
match="Pandas debugging mode failed.",
):
s = xpd.Series([1, 2])
assert s.mean() == 1.5
# Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts.
monkeypatch.setattr(xpd.Series.mean, "_fsproxy_slow", pd_mean)


def test_excelwriter_pathlike():
assert isinstance(pd.ExcelWriter("foo.xlsx"), os.PathLike)
Loading