diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 53ad948b61c..4acdc372817 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. ############################################## # cuDF GPU build and test script for CI # ############################################## @@ -249,15 +249,15 @@ fi cd "$WORKSPACE/python/cudf" gpuci_logger "Python py.test for cuDF" -py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope cudf cd "$WORKSPACE/python/dask_cudf" gpuci_logger "Python py.test for dask-cudf" -py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf cd "$WORKSPACE/python/custreamz" gpuci_logger "Python py.test for cuStreamz" -py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term +py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz gpuci_logger "Test notebooks" "$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py index 041bd055f0a..4d5b5926d6e 100644 --- a/python/cudf/cudf/tests/conftest.py +++ b/python/cudf/cudf/tests/conftest.py @@ -1,10 +1,41 @@ +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + +import os import pathlib import pytest import rmm # noqa: F401 +_CURRENT_DIRECTORY = str(pathlib.Path(__file__).resolve().parent) + @pytest.fixture(scope="session") def datadir(): return pathlib.Path(__file__).parent / "data" + + +# To set and remove the NO_EXTERNAL_ONLY_APIS environment variable we must use +# the sessionstart and sessionfinish hooks rather than a simple autouse, +# session-scope fixture because we need to set these variable before collection +# occurs because the environment variable will be checked as soon as cudf is +# imported anywhere. +def pytest_sessionstart(session): + """ + Called after the Session object has been created and + before performing collection and entering the run test loop. + """ + os.environ["NO_EXTERNAL_ONLY_APIS"] = "1" + os.environ["_CUDF_TEST_ROOT"] = _CURRENT_DIRECTORY + + +def pytest_sessionfinish(session, exitstatus): + """ + Called after whole test run finished, right before + returning the exit status to the system. + """ + try: + del os.environ["NO_EXTERNAL_ONLY_APIS"] + del os.environ["_CUDF_TEST_ROOT"] + except KeyError: + pass diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 4143cbd1d66..b283f89873d 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -2,6 +2,8 @@ import decimal import functools +import os +import traceback from collections.abc import Sequence from typing import FrozenSet, Set, Union @@ -37,6 +39,60 @@ } +# The test root is set by pytest to support situations where tests are run from +# a source tree on a built version of cudf. +NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS") + +_cudf_root = os.path.dirname(cudf.__file__) +# If the environment variable for the test root is not set, we default to +# using the path relative to the cudf root directory. +_tests_root = os.getenv("_CUDF_TEST_ROOT") or os.path.join(_cudf_root, "tests") + + +def _external_only_api(func, alternative=""): + """Decorator to indicate that a function should not be used internally. + + cudf contains many APIs that exist for pandas compatibility but are + intrinsically inefficient. For some of these cudf has internal + equivalents that are much faster. Usage of the slow public APIs inside + our implementation can lead to unnecessary performance bottlenecks. + Applying this decorator to such functions and setting the environment + variable NO_EXTERNAL_ONLY_APIS will cause such functions to raise + exceptions if they are called from anywhere inside cudf, making it easy + to identify and excise such usage. + + The `alternative` should be a complete phrase or sentence since it will + be used verbatim in error messages. + """ + + # If the first arg is a string then an alternative function to use in + # place of this API was provided, so we pass that to a subsequent call. + # It would be cleaner to implement this pattern by using a class + # decorator with a factory method, but there is no way to generically + # wrap docstrings on a class (we would need the docstring to be on the + # class itself, not instances, because that's what `help` looks at) and + # there is also no way to make mypy happy with that approach. + if isinstance(func, str): + return lambda actual_func: _external_only_api(actual_func, func) + + if not NO_EXTERNAL_ONLY_APIS: + return func + + @functools.wraps(func) + def wrapper(*args, **kwargs): + # Check the immediately preceding frame to see if it's in cudf. + frame, lineno = next(traceback.walk_stack(None)) + fn = frame.f_code.co_filename + if _cudf_root in fn and _tests_root not in fn: + raise RuntimeError( + f"External-only API called in {fn} at line {lineno}. " + f"{alternative}" + ) + return func(*args, **kwargs) + + return wrapper + + def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)):