diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml
index e00cf27f..084f4c2f 100644
--- a/.github/actions/setup/action.yml
+++ b/.github/actions/setup/action.yml
@@ -47,7 +47,22 @@ runs:
       run: |
         env
 
+    - name: Set up CTK cache variable
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}" >> $GITHUB_ENV
+        echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV
+
+    - name: Download CTK cache
+      id: ctk-get-cache
+      uses: actions/cache/restore@v4
+      continue-on-error: true
+      with:
+        key: ${{ env.CTK_CACHE_KEY }}
+        path: ./${{ env.CTK_CACHE_FILENAME }}
+
     - name: Get CUDA components
+      if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }}
       shell: bash --noprofile --norc -xeuo pipefail {0}
       run: |
         CUDA_PATH="./cuda_toolkit"
@@ -90,15 +105,43 @@ runs:
         }
 
         # Get headers and shared libraries in place
+        # Note: the existing artifact would need to be manually deleted (ex: through web UI)
+        # if this list is changed, as the artifact actions do not offer any option for us to
+        # invalidate the artifact.
         populate_cuda_path cuda_nvcc
         populate_cuda_path cuda_cudart
         populate_cuda_path cuda_nvrtc
         populate_cuda_path cuda_profiler_api
+        populate_cuda_path libnvjitlink
         ls -l $CUDA_PATH
 
+        # Prepare the cache
+        # Note: try to escape | and > ...
+        tar -czvf ${CTK_CACHE_FILENAME} ${CUDA_PATH}
+
         # Note: the headers will be copied into the cibuildwheel manylinux container,
         # so setting the CUDA_PATH env var here is meaningless.
 
+    - name: Upload CTK cache
+      if: ${{ always() &&
+              steps.ctk-get-cache.outputs.cache-hit != 'true' }}
+      uses: actions/cache/save@v4
+      with:
+        key: ${{ env.CTK_CACHE_KEY }}
+        path: ./${{ env.CTK_CACHE_FILENAME }}
+
+    - name: Restore CTK cache
+      if: ${{ steps.ctk-get-cache.outputs.cache-hit == 'true' }}
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        ls -l
+        CUDA_PATH="./cuda_toolkit"
+        tar -xzvf $CTK_CACHE_FILENAME
+        ls -l $CUDA_PATH
+        if [ ! -d "$CUDA_PATH/include" ]; then
+          exit 1
+        fi
+
     - name: Set environment variables
       shell: bash --noprofile --norc -xeuo pipefail {0}
       run: |
diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml
new file mode 100644
index 00000000..66468bd1
--- /dev/null
+++ b/.github/actions/test/action.yml
@@ -0,0 +1,93 @@
+name: test
+
+description: Run tests in specified project
+
+inputs:
+  test-options:
+    required: true
+    type: string
+
+runs:
+  using: composite
+  steps:
+    - name: Run nvidia-smi to make sure GPU is working
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: nvidia-smi
+
+    - name: Download bindings build artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
+        path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
+
+    - name: Display structure of downloaded bindings artifacts
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        pwd
+        ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
+
+    - name: Download core build artifacts
+      uses: actions/download-artifact@v4
+      with:
+        name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
+        path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
+
+    - name: Display structure of downloaded core build artifacts
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        pwd
+        ls -lahR $CUDA_CORE_ARTIFACTS_DIR
+
+    - name: Set up Python ${{ env.PYTHON_VERSION }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Set up CTK cache variable
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        echo "CTK_CACHE_KEY=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}" >> $GITHUB_ENV
+        echo "CTK_CACHE_FILENAME=mini-ctk-${CTK_BUILD_VER}-${HOST_PLATFORM}.tar.gz" >> $GITHUB_ENV
+
+    - name: Download CTK cache
+      id: ctk-get-cache
+      uses: actions/cache/restore@v4
+      continue-on-error: true
+      with:
+        key: ${{ env.CTK_CACHE_KEY }}
+        path: ./${{ env.CTK_CACHE_FILENAME }}
+        fail-on-cache-miss: true
+
+    - name: Restore CTK cache
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        ls -l
+        CUDA_PATH="$(pwd)/cuda_toolkit"
+        tar -xzvf $CTK_CACHE_FILENAME
+        ls -l $CUDA_PATH
+        if [ ! -d "$CUDA_PATH/include" ]; then
+          exit 1
+        fi
+
+        echo "CUDA_PATH=$CUDA_PATH" >> $GITHUB_ENV
+        echo "PATH=$PATH:$CUDA_PATH/bin" >> $GITHUB_ENV
+        echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_PATH/lib" >> $GITHUB_ENV
+
+    - name: Run test / analysis
+      shell: bash --noprofile --norc -xeuo pipefail {0}
+      run: |
+        REPO_DIR=$(pwd)
+
+        cd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
+        pip install *.whl
+
+        cd "${CUDA_CORE_ARTIFACTS_DIR}"
+        pip install *.whl
+
+        cd "${REPO_DIR}/cuda_bindings"
+        pip install -r requirements.txt
+        pytest tests/
+        #pytest tests/cython
+
+        cd "${REPO_DIR}/cuda_core"
+        pytest -rxXs tests/
diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml
index 1975c3b5..cb27c879 100644
--- a/.github/workflows/ci-gh.yml
+++ b/.github/workflows/ci-gh.yml
@@ -1,4 +1,4 @@
-name: Build and test
+name: "CI"
 
 concurrency:
   group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }}
@@ -11,8 +11,7 @@ on:
       - "main"
 
 jobs:
-  build-and-test:
-    name: Build and test (${{ matrix.host-platform }}, ${{ matrix.target-device }}, ${{ matrix.build-mode }})
+  ci:
     strategy:
       fail-fast: false
       matrix:
@@ -35,6 +34,7 @@ jobs:
           # Note: this is for build-time only; the test-time matrix needs to be
           # defined separately.
           - "12.6.2"
+    name: "CI"
     uses:
       ./.github/workflows/gh-build-and-test.yml
     with:
diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml
index a9a711d4..06f6a168 100644
--- a/.github/workflows/gh-build-and-test.yml
+++ b/.github/workflows/gh-build-and-test.yml
@@ -25,21 +25,100 @@ on:
 
 jobs:
   build:
+    name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}")
     if: ${{ github.repository_owner == 'nvidia' }}
-    uses:
-      ./.github/workflows/gh-build.yml
-    with:
-      client-repo: ${{ github.event.repository.name }}
-      target-device: ${{ inputs.target-device }}
-      runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') ||
-                   (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
-                   (inputs.host-platform == 'win-x64' && 'windows-2019') }}
-                 #  (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }}
-      build-type: ${{ inputs.build-type }}
-      host-platform: ${{ inputs.host-platform }}
-      build-mode: ${{ inputs.build-mode }}
-      upload-enabled: ${{ inputs.upload-enabled }}
-      python-version: ${{ inputs.python-version }}
-      cuda-version: ${{ inputs.cuda-version }}
-      dependencies-file: ""
-    secrets: inherit
+    permissions:
+      id-token: write # This is required for configure-aws-credentials
+      contents: read  # This is required for actions/checkout
+    runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-cpu8') ||
+                 (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
+                 (inputs.host-platform == 'win-x64' && 'windows-2019') }}
+               #  (inputs.host-platform == 'win-x64' && 'windows-amd64-cpu8') }}
+    outputs:
+      CUDA_CORE_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACT_NAME }}
+      CUDA_CORE_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_CORE_ARTIFACTS_DIR }}
+      CUDA_BINDINGS_ARTIFACT_NAME: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}
+      CUDA_BINDINGS_ARTIFACTS_DIR: ${{ steps.pass_env.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}
+    steps:
+      - name: Checkout ${{ github.event.repository.name }}
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up build environment
+        uses: ./.github/actions/setup
+        with:
+          client-repo: ${{ github.event.repository.name }}
+          build-type: ${{ inputs.build-type }}
+          target-device: "${{ inputs.target-device }}"
+          host-platform: ${{ inputs.host-platform }}
+          build-mode: ${{ inputs.build-mode }}
+          upload-enabled: ${{ inputs.upload-enabled }}
+          python-version: ${{ inputs.python-version }}
+          cuda-version: ${{ inputs.cuda-version }}
+
+      - name: Call build action
+        uses: ./.github/actions/build
+        with:
+          build-type: ${{ inputs.build-type }}
+          target-device: "${{ inputs.target-device }}"
+          host-platform: ${{ inputs.host-platform }}
+          upload-enabled: ${{ inputs.upload-enabled }}
+
+      - name: Pass environment variables
+        id: pass_env
+        run: |
+          echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_NAME}" >> $GITHUB_OUTPUT
+          echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
+          echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_NAME}" >> $GITHUB_OUTPUT
+          echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_BINDINGS_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
+
+  test:
+    # TODO: improve the name once a separate test matrix is defined
+    name: Test (CUDA ${{ inputs.cuda-version }})
+    # TODO: enable testing once linux-aarch64 & win-64 GPU runners are up
+    if: ${{ (github.repository_owner == 'nvidia') &&
+             startsWith(inputs.host-platform, 'linux-x64') }}
+    permissions:
+      id-token: write # This is required for configure-aws-credentials
+      contents: read  # This is required for actions/checkout
+    runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') }}
+    # TODO: use a different (nvidia?) container, or just run on bare image
+    container:
+      options: -u root --security-opt seccomp=unconfined --privileged --shm-size 16g
+      image: condaforge/miniforge3:latest
+      env:
+        NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
+    needs:
+      - build
+    steps:
+      - name: Checkout ${{ github.event.repository.name }}
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      # TODO: we probably don't need this?
+      # - name: Setup
+      #   if: ${{ inputs.has-built != 'true' }}
+      #   uses: ./.github/actions/setup
+      #   with:
+      #     client-repo: ${{ github.event.repository.name }}
+      #     build-type: ${{ inputs.build-type }}
+      #     target-device: "${{ inputs.target-device }}"
+      #     host-platform: ${{ inputs.host-platform }}
+      #     build-mode: ${{ inputs.build-mode }}
+      #     upload-enabled: ${{ inputs.upload-enabled }}
+      #     python-version: ${{ inputs.python-version }}
+
+      - name: Call test action
+        uses: ./.github/actions/test
+        with:
+          test-options: ${{ inputs.build-type }}
+        env:
+          CUDA_CORE_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}
+          CUDA_CORE_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}
+          CUDA_BINDINGS_ARTIFACT_NAME: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}
+          CUDA_BINDINGS_ARTIFACTS_DIR: ${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}
+          PYTHON_VERSION: ${{ inputs.python-version }}
+          CTK_BUILD_VER: ${{ inputs.cuda-version }}
+          HOST_PLATFORM: ${{ inputs.host-platform }}
diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml
deleted file mode 100644
index 7a9f03ce..00000000
--- a/.github/workflows/gh-build.yml
+++ /dev/null
@@ -1,73 +0,0 @@
-name: Build
-
-on:
-  workflow_call:
-    inputs:
-      client-repo:
-        required: true
-        type: string
-      target-device:
-        required: true
-        type: string
-      runs-on:
-        required: true
-        type: string
-      build-type:
-        required: true
-        type: string
-        description: One of ci / release
-      host-platform:
-        required: true
-        type: string
-      dependencies-file:
-        required: true
-        type: string
-        description: path to versions.json relative to the target repo dir
-      build-mode:
-        required: true
-        type: string
-      upload-enabled:
-        required: true
-        type: boolean
-      python-version:
-        required: true
-        type: string
-      cuda-version:
-        required: true
-        type: string
-
-jobs:
-  build:
-    name: Build (${{ inputs.host-platform }}, ${{ inputs.build-type }}, ${{ inputs.build-mode }}, Python "${{ inputs.python-version }}")
-
-    permissions:
-      id-token: write # This is required for configure-aws-credentials
-      contents: read  # This is required for actions/checkout
-
-    runs-on: ${{ inputs.runs-on }}
-
-    steps:
-      - name: Checkout ${{ inputs.client-repo }}
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Set up build environment
-        uses: ./.github/actions/setup
-        with:
-          client-repo: ${{ inputs.client-repo }}
-          build-type: ${{ inputs.build-type }}
-          target-device: "${{ inputs.target-device }}"
-          host-platform: ${{ inputs.host-platform }}
-          build-mode: ${{ inputs.build-mode }}
-          upload-enabled: ${{ inputs.upload-enabled }}
-          python-version: ${{ inputs.python-version }}
-          cuda-version: ${{ inputs.cuda-version }}
-
-      - name: Call build action
-        uses: ./.github/actions/build
-        with:
-          build-type: ${{ inputs.build-type }}
-          target-device: "${{ inputs.target-device }}"
-          host-platform: ${{ inputs.host-platform }}
-          upload-enabled: ${{ inputs.upload-enabled }}
diff --git a/continuous_integration/environment.yml b/continuous_integration/environment.yml
deleted file mode 100644
index 6d922d43..00000000
--- a/continuous_integration/environment.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-name: cuda_python
-channels:
-  - defaults
-dependencies:
-  - python>=3.10
-  - cython>=3.0.0
-  - pytest>=6.2.4
-  - numpy>=1.21.1
-  - setuptools
-  - wheel
-  - pip
-  - cuda-version=12.6
-  - cuda-cudart-static
-  - cuda-driver-dev
-  - cuda-cudart-dev
-  - cuda-profiler-api
-  - cuda-nvrtc-dev
-  - cuda-nvcc
-  - pip:
-    - pytest-benchmark>=3.4.1
-    - pyclibrary>=0.1.7
-    - versioneer==0.29
-    - tomli; python_version < "3.11"
-    - pywin32; sys_platform == 'win32'
diff --git a/continuous_integration/scripts/build b/continuous_integration/scripts/build
deleted file mode 100755
index 618edd5f..00000000
--- a/continuous_integration/scripts/build
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-build_ci() {
-    set -xeou pipefail
-
-    export CUDA_HOME="${CONDA_PREFIX}/targets/x86_64-linux"
-    export PARALLEL_LEVEL=$(nproc --ignore 1)
-
-    cd "${REPO_DIR}/cuda_bindings"
-    python setup.py bdist_wheel
-    
-    cd "${REPO_DIR}/cuda_core"
-    python setup.py bdist_wheel
-}
-
-build_project() {
-    set -xeou pipefail
-
-    export PYTHONUNBUFFERED=1
-
-    . setup-utils;
-    init_build_env "$@";
-
-    git config --global --add safe.directory "$REPO_DIR/.git"
-
-    case "${BUILD_TYPE}" in
-        ci) build_ci;;
-        *) return 1;;
-    esac
-}
-
-(build_project "$@");
diff --git a/continuous_integration/scripts/conda-utils b/continuous_integration/scripts/conda-utils
deleted file mode 100755
index e0dd32ca..00000000
--- a/continuous_integration/scripts/conda-utils
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env bash
-
-activate_conda_env() {
-    set +xu
-    eval "$(conda shell.bash hook)"
-    conda activate "${CONDA_ENV}";
-    set -xu
-    : ${PYTHON_VERSION:=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")}
-    export PYTHON_VERSION
-}
-
-conda_info() {
-    set +x
-    conda info
-    set -x
-}
diff --git a/continuous_integration/scripts/entrypoint b/continuous_integration/scripts/entrypoint
deleted file mode 100755
index fe4f5cea..00000000
--- a/continuous_integration/scripts/entrypoint
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-set_initial_env() {
-    set -xeuo pipefail
-
-    export PATH="${PATH}:${REPO_DIR}/continuous_integration/scripts"
-}
-
-entrypoint() {
-    set -xeuo pipefail
-    set_initial_env;
-
-    git config --global --add safe.directory "$REPO_DIR/.git"
-
-    cd "${REPO_DIR}"
-
-    exec "$@";
-}
-
-entrypoint "$@";
diff --git a/continuous_integration/scripts/generate-environment b/continuous_integration/scripts/generate-environment
deleted file mode 100755
index 8bf2c38d..00000000
--- a/continuous_integration/scripts/generate-environment
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-# Function to generate environment.yml
-generate_environment_yml() {
-    local python_version=$1
-    local cuda_version=$2
-    local output_path=$3
-
-    cat <<EOF > "${output_path}/environment.yml"
-name: cuda_python
-channels:
-  - defaults
-  - conda-forge
-dependencies:
-  - python=${python_version}
-  - cython
-  - pytest
-  - numpy
-  - setuptools
-  - wheel
-  - pip
-  - cuda-version=${cuda_version}
-  - cuda-cudart-static
-  - cuda-driver-dev
-  - cuda-cudart-dev
-  - cuda-profiler-api
-  - cuda-nvrtc-dev
-  - cuda-nvcc
-  - pip:
-    - pytest-benchmark
-    - pyclibrary
-    - versioneer==0.29
-    - tomli; python_version < "3.11"
-    - pywin32; sys_platform == 'win32'
-EOF
-}
\ No newline at end of file
diff --git a/continuous_integration/scripts/make-conda-env b/continuous_integration/scripts/make-conda-env
deleted file mode 100755
index 37539b37..00000000
--- a/continuous_integration/scripts/make-conda-env
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-
-set -x
-
-make_ci_env() {
-    #TODO wire cuda version as a top level matrix argument
-    generate_environment_yml "${PYTHON_VERSION}" 12.6 .
-    mamba env create -n "${CONDA_ENV}" -f ./environment.yml
-}
-
-make_conda_env() {
-    set -xeuo pipefail
-
-    . setup-utils;
-    . generate-environment
-    set_base_defs;
-
-    case "$1" in
-        ci) make_ci_env;;
-        test) make_test_env;;
-        *) return 1;;
-    esac
-
-    return 0;
-}
-
-(make_conda_env "$@");
diff --git a/continuous_integration/scripts/setup-utils b/continuous_integration/scripts/setup-utils
deleted file mode 100755
index 62579e63..00000000
--- a/continuous_integration/scripts/setup-utils
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env bash
-
-install_from_apt() {
-    set -xeuo pipefail
-
-    export DEBIAN_FRONTEND=non-interactive
-
-    # Run package updates and install packages
-    apt-get -q update
-    apt-get -q install -y wget curl jq sudo ninja-build vim numactl rsync
-}
-
-install_cmake() {
-    set -xeuo pipefail
-
-    wget -q https://github.com/Kitware/CMake/releases/download/v3.26.5/cmake-3.26.5-linux-x86_64.tar.gz
-
-    tar -xzf cmake-3.26.5-linux-x86_64.tar.gz
-}
-
-setup_linux_build_env() {
-    set -xeuo pipefail
-    export OS_SHORT_NAME=linux
-    export PATH="${PATH}:${PREBUILD_DIR}/cmake-3.26.5-linux-x86_64/bin"
-
-    mkdir -p /tmp/out /tmp/env_yaml
-}
-
-install_linux_tools() {
-    set -xeuo pipefail
-
-    export SED=sed
-    export READLINK=readlink
-
-    install_from_apt;
-    install_cmake;
-
-    mkdir -p /tmp/out /tmp/env_yaml
-}
-
-install_linux_test_tools() {
-    set -xeuo pipefail
-
-    export SED=sed
-    export READLINK=readlink
-
-    # Run package updates and install packages
-    apt-get -q update
-    apt-get -q install -y numactl
-}
-
-set_base_defs() {
-    set -xeuo pipefail
-
-    export CONDA_ENV=cuda_python
-
-    CONDA_PLATFORM=$(conda info | grep 'platform' | awk -F ' : ' '{print $2}')
-    export CONDA_PLATFORM
-
-    export PREBUILD_DIR=/tmp/prebuild
-    mkdir -p "$PREBUILD_DIR"
-
-    export BUILD_DIR="$REPO_DIR/build"
-
-    # Get the machine architecture
-    ARCH=$(uname -m)
-
-    if [ "$ARCH" == "aarch64" ]; then
-        # Use the gcc march value used by aarch64 Ubuntu.
-        BUILD_MARCH=armv8-a
-    else
-        # Use uname -m otherwise
-        BUILD_MARCH=$(uname -m | tr '_' '-')
-    fi
-
-    export BUILD_MARCH
-
-    export CUDA_VERSION=12.2.2
-
-    export MAX_LIBSANITIZER_VERSION=11.4
-
-    export USE_OPENMP=ON
-}
-
-# -----------------------------------------------------------------------------
-
-prep_git() {
-    # Temporarily disable exit on error
-    set +e
-    git config --global user.email > /dev/null
-    local email_exit_status=$?
-    git config --global user.name > /dev/null
-    local name_exit_status=$?
-    # Re-enable exit on error
-    set -e
-
-    if [ $email_exit_status -ne 0 ]; then
-        git config --global --add user.email "users.noreply.github.com"
-        echo "git user.email was not set. It's now set to users.noreply.github.com"
-    else
-        echo "Note: git user.email is already set."
-    fi
-
-    if [ $name_exit_status -ne 0 ]; then
-        git config --global --add user.name "anon"
-        echo "git user.name was not set. It's now set to anon"
-    else
-        echo "Note: git user.name is already set."
-    fi
-
-    # Fix "fatal: detected dubious ownership in repository at '/tmp/legate.core'"
-    # during local builds.
-    git config --global --add safe.directory "$REPO_DIR"
-}
-
-
-setup_build_env() {
-    set -xeuo pipefail
-
-    install_linux_tools;
-
-    setup_linux_build_env;
-
-    rm -rf "$PREBUILD_DIR"
-    mkdir -p "$PREBUILD_DIR"
-    cd $PREBUILD_DIR
-
-    prep_git;
-}
-
-init_build_env() {
-    set -x;
-
-    . conda-utils;
-
-    export BUILD_TYPE=$1
-
-    set -xeuo pipefail;
-
-    set_base_defs;
-
-    cd "$PREBUILD_DIR"
-
-    setup_build_env;
-
-    cd "$REPO_DIR";
-
-    if [[ -d "${BUILD_DIR}" ]]; then
-        rm -rf "${BUILD_DIR}"
-    fi
-
-    make-conda-env "$BUILD_TYPE";
-
-    activate_conda_env;
-    conda_info;
-}
\ No newline at end of file
diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
index b67eeec2..fe755738 100644
--- a/cuda_core/tests/conftest.py
+++ b/cuda_core/tests/conftest.py
@@ -21,8 +21,14 @@
 from cuda.core.experimental._utils import handle_return
 
 
+@pytest.fixture(scope="session", autouse=True)
+def always_init_cuda():
+    handle_return(driver.cuInit(0))
+
+
 @pytest.fixture(scope="function")
 def init_cuda():
+    # TODO: rename this to e.g. init_context
     device = Device()
     device.set_current()
     yield
@@ -41,6 +47,7 @@ def _device_unset_current():
 
 @pytest.fixture(scope="function")
 def deinit_cuda():
+    # TODO: rename this to e.g. deinit_context
     yield
     _device_unset_current()
 
diff --git a/cuda_core/tests/example_tests/utils.py b/cuda_core/tests/example_tests/utils.py
index 731adedb..43982fee 100644
--- a/cuda_core/tests/example_tests/utils.py
+++ b/cuda_core/tests/example_tests/utils.py
@@ -10,7 +10,6 @@
 import os
 import sys
 
-import cupy as cp
 import pytest
 
 
@@ -56,4 +55,3 @@ def run_example(samples_path, filename, env=None):
         sys.argv = old_argv
         # further reduce the memory watermark
         gc.collect()
-        cp.get_default_memory_pool().free_all_blocks()
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 95c4d377..f1c24b3e 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -8,10 +8,19 @@
 
 import pytest
 
-from cuda.core.experimental import Program
+from cuda import cuda, nvrtc
+from cuda.core.experimental import Device, Program
 from cuda.core.experimental._module import Kernel, ObjectCode
 
 
+def can_load_generated_ptx():
+    _, driver_ver = cuda.cuDriverGetVersion()
+    _, nvrtc_major, nvrtc_minor = nvrtc.nvrtcVersion()
+    if nvrtc_major * 1000 + nvrtc_minor * 10 > driver_ver:
+        return False
+    return True
+
+
 def test_program_init_valid_code_type():
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
@@ -31,10 +40,14 @@ def test_program_init_invalid_code_format():
         Program(code, "c++")
 
 
+# TODO: incorporate this check in Program
+@pytest.mark.xfail(not can_load_generated_ptx(), reason="PTX version too new")
 def test_program_compile_valid_target_type():
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++")
-    object_code = program.compile("ptx")
+    arch = "".join(str(i) for i in Device().compute_capability)
+    object_code = program.compile("ptx", options=(f"-arch=compute_{arch}",))
+    print(object_code._module.decode())
     kernel = object_code.get_kernel("my_kernel")
     assert isinstance(object_code, ObjectCode)
     assert isinstance(kernel, Kernel)