Skip to content

Commit

Permalink
Add environment variable to log cudf.pandas fallback calls (#16161)
Browse files Browse the repository at this point in the history
Introduces a new environment variable `LOG_FAST_FALLBACK` which will create a structured log of the call that failed. 

An example of the log is

```
INFO:root:{"debug_type": "LOG_FAST_FALLBACK", "failed_call": "pandas._libs.interval.Interval(0,1)", "exception": "Exception", "exception_message": "Cannot transform _Unusable", "pandas_object": "pandas._libs.interval.Interval", "passed_args": "0,1,", "passed_kwargs": {}}
```

I could turn this into a warning instead, but I imagine we would want to first utilize this to parse the failures and see generalized failures in aggregate

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #16161
  • Loading branch information
mroeschke authored Jul 31, 2024
1 parent 9336c17 commit 0f3b380
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 1 deletion.
80 changes: 80 additions & 0 deletions python/cudf/cudf/pandas/_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import json
import logging

logging.basicConfig(
filename="cudf_pandas_unit_tests_debug.log", level=logging.INFO
)
logger = logging.getLogger()


class StructuredMessage:
# https://docs.python.org/3/howto/logging-cookbook.html#implementing-structured-logging
def __init__(self, debug_type: str, /, **kwargs) -> None:
self.debug_type = debug_type
self.kwargs = kwargs

def __str__(self) -> str:
log = {"debug_type": self.debug_type}
return json.dumps({**log, **self.kwargs})


def reprify(arg) -> str:
"""Attempt to return arg's repr for logging."""
try:
return repr(arg)
except Exception:
return "<REPR FAILED>"


def log_fallback(
slow_args: tuple, slow_kwargs: dict, exception: Exception
) -> None:
"""Log when a fast call falls back to the slow path."""
caller = slow_args[0]
module = getattr(caller, "__module__", "")
obj_name = getattr(caller, "__qualname__", type(caller).__qualname__)
if module:
slow_object = f"{module}.{obj_name}"
else:
slow_object = obj_name
# TODO: Maybe use inspect.signature to map called args and kwargs
# to their keyword names, but a user calling an API incorrectly would
# break this.
caller_args = slow_args[1]
args_passed = ", ".join((reprify(arg) for arg in caller_args))
args_types_passed = ", ".join((type(arg).__name__ for arg in caller_args))
kwargs_passed = {}
kwargs_types_passed = ""
if len(slow_args) == 3:
caller_kwargs = slow_args[2]
if caller_kwargs:
fmt_kwargs = ", ".join(
f"{kwarg}={reprify(value)}"
for kwarg, value in caller_kwargs.items()
)
kwargs_types_passed = ", ".join(
f"{kwarg}={type(value).__name__}"
for kwarg, value in caller_kwargs.items()
)
args_passed = f"{args_passed}, {fmt_kwargs}"
kwargs_passed = {
kwarg: reprify(value) for kwarg, value in caller_kwargs.items()
}
message = StructuredMessage(
"LOG_FAST_FALLBACK",
failed_call=f"{slow_object}({args_passed})",
exception=type(exception).__name__,
exception_message=str(exception),
slow_object=slow_object,
args_passed=args_passed,
kwargs_passed=kwargs_passed,
args_types_passed=args_types_passed,
kwargs_types_passed=kwargs_types_passed,
)
logger.info(message)
6 changes: 5 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,13 +930,17 @@ def _fast_slow_function_call(
"Pandas debugging mode failed. "
f"The exception was {e}."
)
except Exception:
except Exception as err:
with nvtx.annotate(
"EXECUTE_SLOW",
color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"],
domain="cudf_pandas",
):
slow_args, slow_kwargs = _slow_arg(args), _slow_arg(kwargs)
if _env_get_bool("LOG_FAST_FALLBACK", False):
from ._logger import log_fallback

log_fallback(slow_args, slow_kwargs, err)
with disable_module_accelerator():
result = func(*slow_args, **slow_kwargs)
return _maybe_wrap_result(result, func, *args, **kwargs), fast
Expand Down

0 comments on commit 0f3b380

Please sign in to comment.