From 64cbc4c2a600cca5c9eba166fb21091a4c1f0d46 Mon Sep 17 00:00:00 2001 From: Keenan Simpson Date: Tue, 10 Dec 2024 14:02:07 -0800 Subject: [PATCH] Lazy load code modules (#269) Lazy load module in ObjectCode --- cuda_core/cuda/core/experimental/_module.py | 40 ++++++++++----- cuda_core/tests/conftest.py | 10 +++- cuda_core/tests/test_module.py | 55 +++++++-------------- cuda_core/tests/test_program.py | 10 +--- 4 files changed, 53 insertions(+), 62 deletions(-) diff --git a/cuda_core/cuda/core/experimental/_module.py b/cuda_core/cuda/core/experimental/_module.py index 69dbcd37..7a4fc0e2 100644 --- a/cuda_core/cuda/core/experimental/_module.py +++ b/cuda_core/cuda/core/experimental/_module.py @@ -5,7 +5,7 @@ import importlib.metadata from cuda import cuda -from cuda.core.experimental._utils import handle_return +from cuda.core.experimental._utils import handle_return, precondition _backend = { "old": { @@ -106,30 +106,43 @@ class ObjectCode: """ - __slots__ = ("_handle", "_code_type", "_module", "_loader", "_sym_map") + __slots__ = ("_handle", "_backend_version", "_jit_options", "_code_type", "_module", "_loader", "_sym_map") _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin") def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): if code_type not in self._supported_code_type: raise ValueError _lazy_init() + + # handle is assigned during _lazy_load self._handle = None + self._jit_options = jit_options + + self._backend_version = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" + self._loader = _backend[self._backend_version] + + self._code_type = code_type + self._module = module + self._sym_map = {} if symbol_mapping is None else symbol_mapping - backend = "new" if (_py_major_ver >= 12 and _driver_ver >= 12000) else "old" - self._loader = _backend[backend] + # TODO: do we want to unload in a finalizer? Probably not.. + def _lazy_load_module(self, *args, **kwargs): + if self._handle is not None: + return + jit_options = self._jit_options + module = self._module if isinstance(module, str): # TODO: this option is only taken by the new library APIs, but we have # a bug that we can't easily support it just yet (NVIDIA/cuda-python#73). if jit_options is not None: raise ValueError - module = module.encode() self._handle = handle_return(self._loader["file"](module)) else: assert isinstance(module, bytes) if jit_options is None: jit_options = {} - if backend == "new": + if self._backend_version == "new": args = ( module, list(jit_options.keys()), @@ -141,15 +154,15 @@ def __init__(self, module, code_type, jit_options=None, *, symbol_mapping=None): 0, ) else: # "old" backend - args = (module, len(jit_options), list(jit_options.keys()), list(jit_options.values())) + args = ( + module, + len(jit_options), + list(jit_options.keys()), + list(jit_options.values()), + ) self._handle = handle_return(self._loader["data"](*args)) - self._code_type = code_type - self._module = module - self._sym_map = {} if symbol_mapping is None else symbol_mapping - - # TODO: do we want to unload in a finalizer? Probably not.. - + @precondition(_lazy_load_module) def get_kernel(self, name): """Return the :obj:`Kernel` of a specified name from this object code. @@ -168,6 +181,7 @@ def get_kernel(self, name): name = self._sym_map[name] except KeyError: name = name.encode() + data = handle_return(self._loader["kernel"](self._handle, name)) return Kernel._from_obj(data, self) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index fe755738..30d80f6f 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -11,10 +11,10 @@ import sys try: - from cuda.bindings import driver + from cuda.bindings import driver, nvrtc except ImportError: from cuda import cuda as driver - + from cuda import nvrtc import pytest from cuda.core.experimental import Device, _device @@ -65,3 +65,9 @@ def clean_up_cffi_files(): os.remove(f) except FileNotFoundError: pass # noqa: SIM105 + + +def can_load_generated_ptx(): + _, driver_ver = driver.cuDriverGetVersion() + _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() + return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index a976726f..b2519c85 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -6,43 +6,22 @@ # this software and related documentation outside the terms of the EULA # is strictly prohibited. -import importlib import pytest - -from cuda.core.experimental._module import ObjectCode - - -@pytest.mark.skipif( - int(importlib.metadata.version("cuda-python").split(".")[0]) < 12, - reason="Module loading for older drivers validate require valid module code.", -) -def test_object_code_initialization(): - # Test with supported code types - for code_type in ["cubin", "ptx", "fatbin"]: - module_data = b"dummy_data" - obj_code = ObjectCode(module_data, code_type) - assert obj_code._code_type == code_type - assert obj_code._module == module_data - assert obj_code._handle is not None - - # Test with unsupported code type - with pytest.raises(ValueError): - ObjectCode(b"dummy_data", "unsupported_code_type") - - -# TODO add ObjectCode tests which provide the appropriate data for cuLibraryLoadFromFile -def test_object_code_initialization_with_str(): - assert True - - -def test_object_code_initialization_with_jit_options(): - assert True - - -def test_object_code_get_kernel(): - assert True - - -def test_kernel_from_obj(): - assert True +from conftest import can_load_generated_ptx + +from cuda.core.experimental import Program + + +@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new") +def test_get_kernel(): + kernel = """ +extern __device__ int B(); +extern __device__ int C(int a, int b); +__global__ void A() { int result = C(B(), 1);} +""" + object_code = Program(kernel, "c++").compile("ptx", options=("-rdc=true",)) + assert object_code._handle is None + kernel = object_code.get_kernel("A") + assert object_code._handle is not None + assert kernel._handle is not None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index f1c24b3e..cca01af5 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -7,20 +7,12 @@ # is strictly prohibited. import pytest +from conftest import can_load_generated_ptx -from cuda import cuda, nvrtc from cuda.core.experimental import Device, Program from cuda.core.experimental._module import Kernel, ObjectCode -def can_load_generated_ptx(): - _, driver_ver = cuda.cuDriverGetVersion() - _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion() - if nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver: - return False - return True - - def test_program_init_valid_code_type(): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++")