From 05d53cf8849b65d916fc669c5daf1fc73b7bcbee Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 26 Jan 2022 14:07:31 -0600 Subject: [PATCH] Update gpu_utils.py to reflect current CUDA support. (#10113) This PR resolves #10076. It improves `gpu_utils.py` by removing code for handling CUDA < 11.0, which we no longer support. This is marked as "breaking" because of minor Python API changes. I changed the name of an error class from `UnSupportedCUDAError` to `UnsupportedCUDAError` and removed an unused error class named `UnSupportedGPUError`. It appears the `UnSupportedGPUError` class was introduced in #4692 but has never been used in the code. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ashwin Srinath (https://github.com/shwina) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/10113 --- python/cudf/cudf/errors.py | 6 +-- python/cudf/cudf/utils/gpu_utils.py | 83 +++++++++++------------------ 2 files changed, 32 insertions(+), 57 deletions(-) diff --git a/python/cudf/cudf/errors.py b/python/cudf/cudf/errors.py index 8a31afab9cf..5d6f52c0307 100644 --- a/python/cudf/cudf/errors.py +++ b/python/cudf/cudf/errors.py @@ -1,9 +1,5 @@ # Copyright (c) 2020, NVIDIA CORPORATION. -class UnSupportedGPUError(Exception): - pass - - -class UnSupportedCUDAError(Exception): +class UnsupportedCUDAError(Exception): pass diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index 8947760e052..bd3da4ea2ba 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -26,16 +26,7 @@ def validate_setup(): runtimeGetVersion, ) - def _try_get_old_or_new_symbols(): - try: - # CUDA 10.2+ symbols - return [ - cudaError_t.cudaErrorDeviceUninitialized, - cudaError_t.cudaErrorTimeout, - ] - except AttributeError: - # CUDA 10.1 symbols - return [cudaError_t.cudaErrorDeviceUninitilialized] + from cudf.errors import UnsupportedCUDAError notify_caller_errors = { cudaError_t.cudaErrorInitializationError, @@ -51,7 +42,8 @@ def _try_get_old_or_new_symbols(): cudaError_t.cudaErrorSystemNotReady, cudaError_t.cudaErrorSystemDriverMismatch, cudaError_t.cudaErrorCompatNotSupportedOnDevice, - *_try_get_old_or_new_symbols(), + cudaError_t.cudaErrorDeviceUninitialized, + cudaError_t.cudaErrorTimeout, cudaError_t.cudaErrorUnknown, cudaError_t.cudaErrorApiFailureBase, } @@ -72,43 +64,38 @@ def _try_get_old_or_new_symbols(): cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0 ) - if major_version >= 6: - # You have a GPU with NVIDIA Pascal™ architecture or better + if major_version < 6: + # A GPU with NVIDIA Pascal™ architecture or newer is required. + # Reference: https://developer.nvidia.com/cuda-gpus # Hardware Generation Compute Capability + # Ampere 8.x # Turing 7.5 - # Volta 7.x + # Volta 7.0, 7.2 # Pascal 6.x - # Maxwell 5.x + # Maxwell 5.x # Kepler 3.x # Fermi 2.x - pass - else: device_name = deviceGetName(0) minor_version = getDeviceAttribute( cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0 ) warnings.warn( - f"You will need a GPU with NVIDIA Pascal™ or " - f"newer architecture" - f"\nDetected GPU 0: {device_name} \n" - f"Detected Compute Capability: " - f"{major_version}.{minor_version}" + "A GPU with NVIDIA Pascal™ (Compute Capability 6.0) " + "or newer architecture is required.\n" + f"Detected GPU 0: {device_name}\n" + f"Detected Compute Capability: {major_version}.{minor_version}" ) cuda_runtime_version = runtimeGetVersion() - if cuda_runtime_version >= 10000: - # CUDA Runtime Version Check: Runtime version is greater than 10000 - pass - else: - from cudf.errors import UnSupportedCUDAError - - minor_version = cuda_runtime_version % 100 - major_version = (cuda_runtime_version - minor_version) // 1000 - raise UnSupportedCUDAError( - f"Detected CUDA Runtime version is " - f"{major_version}.{str(minor_version)[0]}" - f"Please update your CUDA Runtime to 10.0 or above" + if cuda_runtime_version < 11000: + # Require CUDA Runtime version 11.0 or greater. + major_version = cuda_runtime_version // 1000 + minor_version = (cuda_runtime_version % 1000) // 10 + raise UnsupportedCUDAError( + "Detected CUDA Runtime version is " + f"{major_version}.{minor_version}. " + "Please update your CUDA Runtime to 11.0 or above." ) cuda_driver_supported_rt_version = driverGetVersion() @@ -124,15 +111,12 @@ def _try_get_old_or_new_symbols(): # https://docs.nvidia.com/deploy/cuda-compatibility/index.html if cuda_driver_supported_rt_version == 0: - from cudf.errors import UnSupportedCUDAError - - raise UnSupportedCUDAError( - "We couldn't detect the GPU driver " - "properly. Please follow the linux installation guide to " - "ensure your driver is properly installed " - ": https://docs.nvidia.com/cuda/cuda-installation-guide-linux/" + raise UnsupportedCUDAError( + "We couldn't detect the GPU driver properly. Please follow " + "the installation guide to ensure your driver is properly " + "installed: " + "https://docs.nvidia.com/cuda/cuda-installation-guide-linux/" ) - elif cuda_driver_supported_rt_version >= cuda_runtime_version: # CUDA Driver Version Check: # Driver Runtime version is >= Runtime version @@ -147,17 +131,12 @@ def _try_get_old_or_new_symbols(): # version 450.80.02 supports. pass else: - from cudf.errors import UnSupportedCUDAError - - raise UnSupportedCUDAError( - f"Please update your NVIDIA GPU Driver to support CUDA " - f"Runtime.\n" - f"Detected CUDA Runtime version : {cuda_runtime_version}" - f"\n" - f"Latest version of CUDA supported by current " + raise UnsupportedCUDAError( + "Please update your NVIDIA GPU Driver to support CUDA " + "Runtime.\n" + f"Detected CUDA Runtime version : {cuda_runtime_version}\n" + "Latest version of CUDA supported by current " f"NVIDIA GPU Driver : {cuda_driver_supported_rt_version}" ) - else: - warnings.warn("No NVIDIA GPU detected")