From 19097b12b27886f106d24e91b4ba9b799aba4f1d Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 24 Sep 2021 15:33:57 +0200 Subject: [PATCH] Prevent CUDA context errors when testing on single-GPU (#737) Add a `DASK_CUDA_TEST_SINGLE_GPU` environment variable that allows informing a single-GPU system is used for testing (such as gpuCI). This then prevents throwing errors when attempting to mock create CUDA context on devices that are specified via `CUDA_VISIBLE_DEVICES` but are unavailable in the system. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/dask-cuda/pull/737 --- ci/gpu/build.sh | 2 +- dask_cuda/initialize.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 3ab70a306..6626629d6 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -106,7 +106,7 @@ else gpuci_logger "Python pytest for dask-cuda" cd "$WORKSPACE" ls dask_cuda/tests/ - UCXPY_IFNAME=eth0 UCX_WARN_UNUSED_ENV_VARS=n UCX_MEMTYPE_CACHE=n pytest -vs -Werror::DeprecationWarning -Werror::FutureWarning --cache-clear --basetemp="$WORKSPACE/dask-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cuda.xml" --cov-config=.coveragerc --cov=dask_cuda --cov-report=xml:"$WORKSPACE/dask-cuda-coverage.xml" --cov-report term dask_cuda/tests/ + DASK_CUDA_TEST_SINGLE_GPU=1 UCXPY_IFNAME=eth0 UCX_WARN_UNUSED_ENV_VARS=n UCX_MEMTYPE_CACHE=n pytest -vs -Werror::DeprecationWarning -Werror::FutureWarning --cache-clear --basetemp="$WORKSPACE/dask-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cuda.xml" --cov-config=.coveragerc --cov=dask_cuda --cov-report=xml:"$WORKSPACE/dask-cuda-coverage.xml" --cov-report term dask_cuda/tests/ logger "Run local benchmark..." python dask_cuda/benchmarks/local_cudf_shuffle.py --partition-size="1 KiB" -d 0 --runs 1 --backend dask diff --git a/dask_cuda/initialize.py b/dask_cuda/initialize.py index cbfd6098a..1cb58c757 100644 --- a/dask_cuda/initialize.py +++ b/dask_cuda/initialize.py @@ -14,6 +14,16 @@ logger = logging.getLogger(__name__) +def _create_cuda_context_handler(): + if int(os.environ.get("DASK_CUDA_TEST_SINGLE_GPU", "0")) != 0: + try: + numba.cuda.current_context() + except numba.cuda.cudadrv.error.CudaSupportError: + pass + else: + numba.cuda.current_context() + + def _create_cuda_context(): try: # Added here to ensure the parent `LocalCUDACluster` process creates the CUDA @@ -39,7 +49,7 @@ def _create_cuda_context(): "import time or in the global scope of a program." ) - numba.cuda.current_context() + _create_cuda_context_handler() if distributed.comm.ucx.cuda_context_created is False: ctx = has_cuda_context()