diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index 9d533af616288..13f48d1c38543 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -76,3 +76,97 @@ __version__ = version onnxruntime_validation.check_distro_info() + + +def check_and_load_cuda_libs(root_directory, cuda_libs_): + # Convert the target library names to lowercase for case-insensitive comparison + # Convert the target library names to lowercase for case-insensitive comparison + if cuda_libs_ is None or len(cuda_libs_) == 0: + logging.info("No CUDA libraries provided for loading.") + return + cuda_libs_ = {lib.lower() for lib in cuda_libs_} + found_libs = {} + for dirpath, _, filenames in os.walk(root_directory): + # Convert filenames in the current directory to lowercase for comparison + files_in_dir = {file.lower(): file for file in filenames} # Map lowercase to original + # Find common libraries in the current directory + matched_libs = cuda_libs_.intersection(files_in_dir.keys()) + for lib in matched_libs: + # Store the full path of the found DLL + full_path = os.path.join(dirpath, files_in_dir[lib]) + found_libs[lib] = full_path + try: + # Load the DLL using ctypes + _ = ctypes.CDLL(full_path) + logging.info(f"Successfully loaded: {full_path}") + except OSError as e: + logging.info(f"Failed to load {full_path}: {e}") + + # If all required libraries are found, stop the search + if set(found_libs.keys()) == cuda_libs_: + logging.info("All required CUDA libraries found and loaded.") + return + logging.info( + f"Failed to load CUDA libraries from site-packages/nvidia directory: {cuda_libs_ - found_libs.keys()}. They might be loaded later from standard search paths for shared libraries." + ) + return + + +# Load nvidia libraries from site-packages/nvidia if the package is onnxruntime-gpu +if cuda_version is not None and cuda_version != "": + import ctypes + import logging + import os + import platform + import site + + cuda_version_ = tuple(map(int, cuda_version.split("."))) + # Get the site-packages path where nvidia packages are installed + site_packages_path = site.getsitepackages()[-1] + nvidia_path = os.path.join(site_packages_path, "nvidia") + # Traverse the directory and subdirectories + cuda_libs = () + if platform.system() == "Windows": # + # Define the list of DLL patterns, nvrtc, curand and nvJitLink are not included for Windows + if (11, 0) <= cuda_version_ < (12, 0): + cuda_libs = ( + "cublaslt64_11.dll", + "cublas64_11.dll", + "cufft64_10.dll", + "cudart64_11.dll", + "cudnn64_8.dll", + ) + elif (12, 0) <= cuda_version_ < (13, 0): + cuda_libs = ( + "cublaslt64_12.dll", + "cublas64_12.dll", + "cufft64_11.dll", + "cudart64_12.dll", + "cudnn64_9.dll", + ) + elif platform.system() == "Linux": + if (11, 0) <= cuda_version_ < (12, 0): + # Define the patterns with optional version number and case-insensitivity + cuda_libs = ( + "libcublaslt.so.11", + "libcublas.so.11", + "libcurand.so.10", + "libcufft.so.10", + "libcudart.so.11", + "libcudnn.so.8", + "libnvrtc.so.11.2", + # This is not a mistake, it links to more specific version like libnvrtc.so.11.8.89 etc. + ) + elif (12, 0) <= cuda_version_ < (13, 0): + cuda_libs = ( + "libcublaslt.so.12", + "libcublas.so.12", + "libcurand.so.10", + "libcufft.so.11", + "libcudart.so.12", + "libcudnn.so.9", + "libnvrtc.so.12", + ) + else: + logging.info(f"Unsupported platform: {platform.system()}") + check_and_load_cuda_libs(nvidia_path, cuda_libs) diff --git a/onnxruntime/python/onnxruntime_validation.py b/onnxruntime/python/onnxruntime_validation.py index 4f29c7f424845..167f7976aecca 100644 --- a/onnxruntime/python/onnxruntime_validation.py +++ b/onnxruntime/python/onnxruntime_validation.py @@ -99,32 +99,33 @@ def validate_build_package_info(): version = "" cuda_version = "" - if has_ortmodule: - try: - # collect onnxruntime package name, version, and cuda version - from .build_and_package_info import __version__ as version - from .build_and_package_info import package_name + try: + # collect onnxruntime package name, version, and cuda version + from .build_and_package_info import __version__ as version + from .build_and_package_info import package_name + + try: # noqa: SIM105 + from .build_and_package_info import cuda_version + except Exception: + pass - try: # noqa: SIM105 - from .build_and_package_info import cuda_version + if cuda_version: + # collect cuda library build info. the library info may not be available + # when the build environment has none or multiple libraries installed + try: + from .build_and_package_info import cudart_version except Exception: - pass - - if cuda_version: - # collect cuda library build info. the library info may not be available - # when the build environment has none or multiple libraries installed - try: - from .build_and_package_info import cudart_version - except Exception: - warnings.warn("WARNING: failed to get cudart_version from onnxruntime build info.") - cudart_version = None - - def print_build_package_info(): - warnings.warn(f"onnxruntime training package info: package_name: {package_name}") - warnings.warn(f"onnxruntime training package info: __version__: {version}") - warnings.warn(f"onnxruntime training package info: cuda_version: {cuda_version}") - warnings.warn(f"onnxruntime build info: cudart_version: {cudart_version}") + warnings.warn("WARNING: failed to get cudart_version from onnxruntime build info.") + cudart_version = None + + def print_build_package_info(): + warnings.warn(f"onnxruntime training package info: package_name: {package_name}") + warnings.warn(f"onnxruntime training package info: __version__: {version}") + warnings.warn(f"onnxruntime training package info: cuda_version: {cuda_version}") + warnings.warn(f"onnxruntime build info: cudart_version: {cudart_version}") + # Cudart only available on Linux + if platform.system().lower() == "linux": # collection cuda library info from current environment. from onnxruntime.capi.onnxruntime_collect_build_info import find_cudart_versions @@ -133,13 +134,13 @@ def print_build_package_info(): print_build_package_info() warnings.warn("WARNING: failed to find cudart version that matches onnxruntime build info") warnings.warn(f"WARNING: found cudart versions: {local_cudart_versions}") - else: - # TODO: rcom - pass + else: + # TODO: rcom + pass - except Exception as e: - warnings.warn("WARNING: failed to collect onnxruntime version and build info") - print(e) + except Exception as e: + warnings.warn("WARNING: failed to collect onnxruntime version and build info") + print(e) if import_ortmodule_exception: raise import_ortmodule_exception diff --git a/setup.py b/setup.py index 1ca31cb0019f0..2c9a8a5600401 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ import datetime import logging import platform +import re import shlex import subprocess import sys @@ -54,6 +55,7 @@ def parse_arg_remove_string(argv, arg_name_equal): wheel_name_suffix = parse_arg_remove_string(sys.argv, "--wheel_name_suffix=") cuda_version = None +cuda_version_major = None rocm_version = None is_migraphx = False is_rocm = False @@ -63,6 +65,11 @@ def parse_arg_remove_string(argv, arg_name_equal): if wheel_name_suffix == "gpu": # TODO: how to support multiple CUDA versions? cuda_version = parse_arg_remove_string(sys.argv, "--cuda_version=") + if cuda_version is not None: + if not bool(re.match(r"^\d+\.\d+(\.\d+)?$", cuda_version)): + logger.error("CUDA version must be in format 'x.y' or 'x.y.z'") + sys.exit(1) + cuda_version_major = cuda_version.split(".")[0] elif parse_arg_remove_boolean(sys.argv, "--use_rocm"): is_rocm = True rocm_version = parse_arg_remove_string(sys.argv, "--rocm_version=") @@ -705,11 +712,22 @@ def reformat_run_count(count_str): version_number = version_number + local_version if is_rocm and enable_rocm_profiling: version_number = version_number + ".profiling" - +extras_require = {} if wheel_name_suffix: if not (enable_training and wheel_name_suffix == "gpu"): # for training packages, local version is used to indicate device types package_name = f"{package_name}-{wheel_name_suffix}" + if (wheel_name_suffix == "gpu" or wheel_name_suffix == "cuda") and cuda_version_major is not None: + extras_require = { + # Optional 'cuda_dlls' dependencies + "cuda_dlls": [ + f"nvidia-cuda-nvrtc-cu{cuda_version_major}", + f"nvidia-cuda-runtime-cu{cuda_version_major}", + f"nvidia-cudnn-cu{cuda_version_major}", + f"nvidia-cufft-cu{cuda_version_major}", + f"nvidia-curand-cu{cuda_version_major}", + ] + } cmd_classes = {} if bdist_wheel is not None: @@ -727,21 +745,20 @@ def reformat_run_count(count_str): install_requires = f.read().splitlines() -if enable_training: - - def save_build_and_package_info(package_name, version_number, cuda_version, rocm_version): - sys.path.append(path.join(path.dirname(__file__), "onnxruntime", "python")) - from onnxruntime_collect_build_info import find_cudart_versions +def save_build_and_package_info(package_name, version_number, cuda_version, rocm_version): + sys.path.append(path.join(path.dirname(__file__), "onnxruntime", "python")) + from onnxruntime_collect_build_info import find_cudart_versions - version_path = path.join("onnxruntime", "capi", "build_and_package_info.py") - with open(version_path, "w") as f: - f.write(f"package_name = '{package_name}'\n") - f.write(f"__version__ = '{version_number}'\n") + version_path = path.join("onnxruntime", "capi", "build_and_package_info.py") + with open(version_path, "w") as f: + f.write(f"package_name = '{package_name}'\n") + f.write(f"__version__ = '{version_number}'\n") - if cuda_version: - f.write(f"cuda_version = '{cuda_version}'\n") + if cuda_version: + f.write(f"cuda_version = '{cuda_version}'\n") - # cudart_versions are integers + # cudart_versions are integers + if platform.system().lower() == "linux": cudart_versions = find_cudart_versions(build_env=True) if cudart_versions and len(cudart_versions) == 1: f.write(f"cudart_version = {cudart_versions[0]}\n") @@ -754,12 +771,15 @@ def save_build_and_package_info(package_name, version_number, cuda_version, rocm else "found multiple cudart libraries" ), ) - elif rocm_version: - f.write(f"rocm_version = '{rocm_version}'\n") + elif rocm_version: + f.write(f"rocm_version = '{rocm_version}'\n") + +if enable_training: save_build_and_package_info(package_name, version_number, cuda_version, rocm_version) +else: + save_build_and_package_info(package_name, version_number, cuda_version, None) -# Setup setup( name=package_name, version=version_number, @@ -783,4 +803,5 @@ def save_build_and_package_info(package_name, version_number, cuda_version, rocm ] }, classifiers=classifiers, + extras_require=extras_require, )