diff --git a/.github/actions/compute-matrix/action.yaml b/.github/actions/compute-matrix/action.yaml index 3dd73bc..4bb7dee 100644 --- a/.github/actions/compute-matrix/action.yaml +++ b/.github/actions/compute-matrix/action.yaml @@ -20,14 +20,14 @@ runs: " export TEST_MATRIX=" - - { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '11.4.3', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - - { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '12.5.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '11.4.3', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - - { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '12.5.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } " echo "BUILD_MATRIX=$( diff --git a/.github/workflows/conda-python-tests.yaml b/.github/workflows/conda-python-tests.yaml index ba9390e..1e6863e 100644 --- a/.github/workflows/conda-python-tests.yaml +++ b/.github/workflows/conda-python-tests.yaml @@ -134,6 +134,7 @@ jobs: run: ${{ inputs.script }} env: GH_TOKEN: ${{ github.token }} + CUDA_VER: ${{ matrix.CUDA_VER }} - name: Generate test report uses: test-summary/action@v2.3 with: diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 7498eb1..70763f4 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -68,12 +68,10 @@ jobs: test-wheels: needs: - build-wheels - - compute-matrix uses: ./.github/workflows/wheels-test.yaml with: build_type: pull-request script: "ci/test_wheel.sh" - matrix_filter: ${{ needs.compute-matrix.outputs.TEST_MATRIX }} build-docs: needs: - build-conda diff --git a/.github/workflows/wheels-test.yaml b/.github/workflows/wheels-test.yaml index 10acf54..e25c61b 100644 --- a/.github/workflows/wheels-test.yaml +++ b/.github/workflows/wheels-test.yaml @@ -79,23 +79,14 @@ jobs: # export MATRICES=" pull-request: - # amd64 - - { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' } - # arm64 - - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - nightly: - # amd64 - - { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.9', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.10', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'v100', driver: 'latest' } - # arm64 - - { ARCH: 'arm64', PY_VER: '3.9', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.0.1', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.10', CUDA_VER: '12.5.1', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' } - - { ARCH: 'arm64', PY_VER: '3.11', CUDA_VER: '11.8.0', LINUX_VER: 'ubuntu22.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'amd64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '12.0.1', ARCH: 'amd64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '12.5.1', ARCH: 'amd64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'v100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.9', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '11.8.0', ARCH: 'arm64', PY_VER: '3.10', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '12.0.1', ARCH: 'arm64', PY_VER: '3.11', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } + - { CUDA_VER: '12.5.1', ARCH: 'arm64', PY_VER: '3.12', LINUX_VER: 'ubuntu20.04', gpu: 'a100', driver: 'latest' } " TEST_MATRIX=$(yq -n 'env(MATRICES) | .[strenv(BUILD_TYPE)]') diff --git a/ci/test_conda.sh b/ci/test_conda.sh index 5861128..d6e8bc0 100755 --- a/ci/test_conda.sh +++ b/ci/test_conda.sh @@ -5,15 +5,20 @@ set -euo pipefail . /opt/conda/etc/profile.d/conda.sh +if [ "${CUDA_VER%.*.*}" = "11" ]; then + CTK_PACKAGES="cudatoolkit" +else + CTK_PACKAGES="cuda-nvcc-impl cuda-nvrtc" +fi + rapids-logger "Install testing dependencies" # TODO: Replace with rapids-dependency-file-generator rapids-mamba-retry create -n test \ c-compiler \ cxx-compiler \ - cuda-nvcc-impl \ - cuda-nvrtc \ + ${CTK_PACKAGES} \ cuda-python \ - cuda-version=${RAPIDS_CUDA_VERSION%.*} \ + cuda-version=${CUDA_VER%.*} \ make \ psutil \ pytest \ diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py index 6e56076..309169b 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py @@ -87,6 +87,8 @@ def test_used_list(self): self.assertIn('section "llvm.metadata"', used_line) def test_nvvm_ir_verify_fail(self): + if runtime.get_version() >= (12, 5): + self.skipTest("Bad triple doesn't fail verify on CUDA >= 12.5") m = ir.Module("test_bad_ir") m.triple = "unknown-unknown-unknown" m.data_layout = NVVM().data_layout diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py b/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py index dd3da96..86dbb22 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_atomics.py @@ -558,18 +558,12 @@ def assertCorrectFloat64Atomics(self, kernel, shared=True): # Use the first (and only) definition asm = next(iter(kernel.inspect_asm().values())) if cc_X_or_above(6, 0): - if cuda.runtime.get_version() > (12, 1): - # CUDA 12.2 and above generate a more optimized reduction - # instruction, because the result does not need to be - # placed in a register. - inst = 'red' - else: - inst = 'atom' + inst = "(red|atom)" if shared: - inst = f'{inst}.shared' + inst = f'{inst}\\.shared' - self.assertIn(f'{inst}.add.f64', asm) + self.assertRegex(asm, f'{inst}.add.f64', asm) else: if shared: self.assertIn('atom.shared.cas.b64', asm)