From a1ca8c1e408ac1791c4f4bae563e775bbddb5a29 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Mon, 29 Nov 2021 11:32:43 -0500 Subject: [PATCH] Use ptxcompiler to patch Numba at runtime to support CUDA enhanced compatibility. (#9687) --- conda/environments/cudf_dev_cuda11.0.yml | 3 +- conda/environments/cudf_dev_cuda11.2.yml | 3 +- conda/environments/cudf_dev_cuda11.5.yml | 3 +- conda/recipes/cudf/meta.yaml | 5 +- python/cudf/cudf/__init__.py | 11 ++++ .../cudf/tests/test_extension_compilation.py | 57 ++++++++++++------- 6 files changed, 56 insertions(+), 26 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index e2ead779861..7c22b4d35e3 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 6146d84835a..0978ae7c8f9 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 043c81c9e01..d2d0a38c44e 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -14,7 +14,7 @@ dependencies: - cmake>=3.20.1 - cmake_setuptools>=0.1.3 - python>=3.7,<3.9 - - numba>=0.53.1 + - numba>=0.54 - numpy - pandas>=1.0,<1.4.0dev0 - pyarrow=5.0.0=*cuda @@ -66,3 +66,4 @@ dependencies: - git+https://github.com/dask/distributed.git@main - git+https://github.com/python-streamz/streamz.git@master - pyorc + - ptxcompiler # [linux64] diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 6d56b0c0c94..46eefbc825f 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -29,7 +29,7 @@ requirements: - python - cython >=0.29,<0.30 - setuptools - - numba >=0.53.1 + - numba >=0.54 - dlpack>=0.5,<0.6.0a0 - pyarrow 5.0.0 *cuda - libcudf {{ version }} @@ -41,7 +41,7 @@ requirements: - typing_extensions - pandas >=1.0,<1.4.0dev0 - cupy >=9.5.0,<10.0.0a0 - - numba >=0.53.1 + - numba >=0.54 - numpy - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda - fastavro >=0.22.0 @@ -51,6 +51,7 @@ requirements: - nvtx >=0.2.1 - packaging - cachetools + - ptxcompiler # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler test: # [linux64] requires: # [linux64] diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index bc35551b5bd..b24e71e7785 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -102,6 +102,17 @@ from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import set_allocator +try: + from ptxcompiler.patch import patch_numba_codegen_if_needed +except ImportError: + pass +else: + # Patch Numba to support CUDA enhanced compatibility. + # See https://github.com/rapidsai/ptxcompiler for + # details. + patch_numba_codegen_if_needed() + del patch_numba_codegen_if_needed + cuda.set_memory_manager(rmm.RMMNumbaManager) cupy.cuda.set_allocator(rmm.rmm_cupy_allocator) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 39fa7b11ce2..47c9448cf63 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,5 +1,6 @@ import operator +import cupy as cp import pytest from numba import cuda, types from numba.cuda import compile_ptx @@ -71,8 +72,8 @@ def test_execute_masked_binary(op, ty): def func(x, y): return op(x, y) - @cuda.jit(debug=True) - def test_kernel(x, y): + @cuda.jit + def test_kernel(x, y, err): # Reference result with unmasked value u = func(x, y) @@ -87,14 +88,22 @@ def test_kernel(x, y): # Check masks are as expected, and unmasked result matches masked # result if r0.valid: - raise RuntimeError("Expected r0 to be invalid") + # TODO: ideally, we would raise an exception here rather + # than return an "error code", and that is what the + # previous version of this (and below) tests did. But, + # Numba kernels cannot currently use `debug=True` with + # CUDA enhanced compatibility. Once a solution to that is + # reached, we should switch back to raising exceptions + # here. + err[0] = 1 if not r1.valid: - raise RuntimeError("Expected r1 to be valid") + err[0] = 2 if u != r1.value: - print("Values: ", u, r1.value) - raise RuntimeError("u != r1.value") + err[0] = 3 - test_kernel[1, 1](1, 2) + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](1, 2, err) + assert err[0] == 0 @pytest.mark.parametrize("op", ops) @@ -187,18 +196,20 @@ def test_is_na(fn): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): valid_is_na = device_fn(valid) invalid_is_na = device_fn(invalid) if valid_is_na: - raise RuntimeError("Valid masked value is NA and should not be") + err[0] = 1 if not invalid_is_na: - raise RuntimeError("Invalid masked value is not NA and should be") + err[0] = 2 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0 def func_lt_na(x): @@ -271,8 +282,8 @@ def test_na_masked_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): unmasked = ty(1) valid_masked = Masked(unmasked, True) invalid_masked = Masked(unmasked, False) @@ -281,12 +292,14 @@ def test_kernel(): invalid_cmp_na = device_fn(invalid_masked) if valid_cmp_na: - raise RuntimeError("Valid masked value compared True with NA") + err[0] = 1 if invalid_cmp_na: - raise RuntimeError("Invalid masked value compared True with NA") + err[0] = 2 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0 # xfail because scalars do not yet cast for a comparison to NA @@ -297,13 +310,15 @@ def test_na_scalar_comparisons(fn, ty): device_fn = cuda.jit(device=True)(fn) - @cuda.jit(debug=True) - def test_kernel(): + @cuda.jit + def test_kernel(err): unmasked = ty(1) unmasked_cmp_na = device_fn(unmasked) if unmasked_cmp_na: - raise RuntimeError("Unmasked value compared True with NA") + err[0] = 1 - test_kernel[1, 1]() + err = cp.asarray([0], dtype="int8") + test_kernel[1, 1](err) + assert err[0] == 0