Skip to content

Commit

Permalink
Update gpu_utils.py to reflect current CUDA support. (#10113)
Browse files Browse the repository at this point in the history
This PR resolves #10076. It improves `gpu_utils.py` by removing code for handling CUDA < 11.0, which we no longer support.

This is marked as "breaking" because of minor Python API changes. I changed the name of an error class from `UnSupportedCUDAError` to `UnsupportedCUDAError` and removed an unused error class named `UnSupportedGPUError`. It appears the `UnSupportedGPUError` class was introduced in #4692 but has never been used in the code.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #10113
  • Loading branch information
bdice authored Jan 26, 2022
1 parent 3265531 commit 05d53cf
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 57 deletions.
6 changes: 1 addition & 5 deletions python/cudf/cudf/errors.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# Copyright (c) 2020, NVIDIA CORPORATION.


class UnSupportedGPUError(Exception):
pass


class UnSupportedCUDAError(Exception):
class UnsupportedCUDAError(Exception):
pass
83 changes: 31 additions & 52 deletions python/cudf/cudf/utils/gpu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,7 @@ def validate_setup():
runtimeGetVersion,
)

def _try_get_old_or_new_symbols():
try:
# CUDA 10.2+ symbols
return [
cudaError_t.cudaErrorDeviceUninitialized,
cudaError_t.cudaErrorTimeout,
]
except AttributeError:
# CUDA 10.1 symbols
return [cudaError_t.cudaErrorDeviceUninitilialized]
from cudf.errors import UnsupportedCUDAError

notify_caller_errors = {
cudaError_t.cudaErrorInitializationError,
Expand All @@ -51,7 +42,8 @@ def _try_get_old_or_new_symbols():
cudaError_t.cudaErrorSystemNotReady,
cudaError_t.cudaErrorSystemDriverMismatch,
cudaError_t.cudaErrorCompatNotSupportedOnDevice,
*_try_get_old_or_new_symbols(),
cudaError_t.cudaErrorDeviceUninitialized,
cudaError_t.cudaErrorTimeout,
cudaError_t.cudaErrorUnknown,
cudaError_t.cudaErrorApiFailureBase,
}
Expand All @@ -72,43 +64,38 @@ def _try_get_old_or_new_symbols():
cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, 0
)

if major_version >= 6:
# You have a GPU with NVIDIA Pascal™ architecture or better
if major_version < 6:
# A GPU with NVIDIA Pascal™ architecture or newer is required.
# Reference: https://developer.nvidia.com/cuda-gpus
# Hardware Generation Compute Capability
# Ampere 8.x
# Turing 7.5
# Volta 7.x
# Volta 7.0, 7.2
# Pascal 6.x
# Maxwell 5.x
# Maxwell 5.x
# Kepler 3.x
# Fermi 2.x
pass
else:
device_name = deviceGetName(0)
minor_version = getDeviceAttribute(
cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, 0
)
warnings.warn(
f"You will need a GPU with NVIDIA Pascal™ or "
f"newer architecture"
f"\nDetected GPU 0: {device_name} \n"
f"Detected Compute Capability: "
f"{major_version}.{minor_version}"
"A GPU with NVIDIA Pascal™ (Compute Capability 6.0) "
"or newer architecture is required.\n"
f"Detected GPU 0: {device_name}\n"
f"Detected Compute Capability: {major_version}.{minor_version}"
)

cuda_runtime_version = runtimeGetVersion()

if cuda_runtime_version >= 10000:
# CUDA Runtime Version Check: Runtime version is greater than 10000
pass
else:
from cudf.errors import UnSupportedCUDAError

minor_version = cuda_runtime_version % 100
major_version = (cuda_runtime_version - minor_version) // 1000
raise UnSupportedCUDAError(
f"Detected CUDA Runtime version is "
f"{major_version}.{str(minor_version)[0]}"
f"Please update your CUDA Runtime to 10.0 or above"
if cuda_runtime_version < 11000:
# Require CUDA Runtime version 11.0 or greater.
major_version = cuda_runtime_version // 1000
minor_version = (cuda_runtime_version % 1000) // 10
raise UnsupportedCUDAError(
"Detected CUDA Runtime version is "
f"{major_version}.{minor_version}. "
"Please update your CUDA Runtime to 11.0 or above."
)

cuda_driver_supported_rt_version = driverGetVersion()
Expand All @@ -124,15 +111,12 @@ def _try_get_old_or_new_symbols():
# https://docs.nvidia.com/deploy/cuda-compatibility/index.html

if cuda_driver_supported_rt_version == 0:
from cudf.errors import UnSupportedCUDAError

raise UnSupportedCUDAError(
"We couldn't detect the GPU driver "
"properly. Please follow the linux installation guide to "
"ensure your driver is properly installed "
": https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"
raise UnsupportedCUDAError(
"We couldn't detect the GPU driver properly. Please follow "
"the installation guide to ensure your driver is properly "
"installed: "
"https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"
)

elif cuda_driver_supported_rt_version >= cuda_runtime_version:
# CUDA Driver Version Check:
# Driver Runtime version is >= Runtime version
Expand All @@ -147,17 +131,12 @@ def _try_get_old_or_new_symbols():
# version 450.80.02 supports.
pass
else:
from cudf.errors import UnSupportedCUDAError

raise UnSupportedCUDAError(
f"Please update your NVIDIA GPU Driver to support CUDA "
f"Runtime.\n"
f"Detected CUDA Runtime version : {cuda_runtime_version}"
f"\n"
f"Latest version of CUDA supported by current "
raise UnsupportedCUDAError(
"Please update your NVIDIA GPU Driver to support CUDA "
"Runtime.\n"
f"Detected CUDA Runtime version : {cuda_runtime_version}\n"
"Latest version of CUDA supported by current "
f"NVIDIA GPU Driver : {cuda_driver_supported_rt_version}"
)

else:

warnings.warn("No NVIDIA GPU detected")

0 comments on commit 05d53cf

Please sign in to comment.