From f5d1c24760d90003c1a577c696ac5de23a289e64 Mon Sep 17 00:00:00 2001
From: Ray Douglass <ray@raydouglass.com>
Date: Mon, 20 May 2024 17:38:30 -0400
Subject: [PATCH 01/15] DOC v24.08 Updates [skip ci]

---
 .../cuda11.8-conda/devcontainer.json          |  6 +--
 .devcontainer/cuda11.8-pip/devcontainer.json  |  6 +--
 .../cuda12.2-conda/devcontainer.json          |  6 +--
 .devcontainer/cuda12.2-pip/devcontainer.json  |  6 +--
 .github/workflows/build.yaml                  | 16 ++++----
 .github/workflows/pandas-tests.yaml           |  2 +-
 .github/workflows/pr.yaml                     | 40 +++++++++----------
 .github/workflows/test.yaml                   | 22 +++++-----
 README.md                                     |  2 +-
 VERSION                                       |  2 +-
 .../all_cuda-118_arch-x86_64.yaml             | 10 ++---
 .../all_cuda-122_arch-x86_64.yaml             | 10 ++---
 cpp/examples/versions.cmake                   |  2 +-
 dependencies.yaml                             | 32 +++++++--------
 java/ci/README.md                             |  4 +-
 java/pom.xml                                  |  2 +-
 python/cudf/pyproject.toml                    |  4 +-
 python/cudf_kafka/pyproject.toml              |  2 +-
 python/cudf_polars/pyproject.toml             |  2 +-
 python/custreamz/pyproject.toml               |  4 +-
 python/dask_cudf/pyproject.toml               |  6 +--
 21 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json
index 944a73ecc98..c62e18512a0 100644
--- a/.devcontainer/cuda11.8-conda/devcontainer.json
+++ b/.devcontainer/cuda11.8-conda/devcontainer.json
@@ -5,17 +5,17 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-mambaforge-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-conda"
+    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json
index 8b802333bda..4ab4bd75643 100644
--- a/.devcontainer/cuda11.8-pip/devcontainer.json
+++ b/.devcontainer/cuda11.8-pip/devcontainer.json
@@ -5,17 +5,17 @@
     "args": {
       "CUDA": "11.8",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda11.8-pip"
+    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json
index 886b07025cc..2b50454410f 100644
--- a/.devcontainer/cuda12.2-conda/devcontainer.json
+++ b/.devcontainer/cuda12.2-conda/devcontainer.json
@@ -5,17 +5,17 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "conda",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-mambaforge-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-mambaforge-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-conda"
+    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-conda"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json
index 86df56ada19..fc5abc56094 100644
--- a/.devcontainer/cuda12.2-pip/devcontainer.json
+++ b/.devcontainer/cuda12.2-pip/devcontainer.json
@@ -5,17 +5,17 @@
     "args": {
       "CUDA": "12.2",
       "PYTHON_PACKAGE_MANAGER": "pip",
-      "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04"
+      "BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.2-ubuntu22.04"
     }
   },
   "runArgs": [
     "--rm",
     "--name",
-    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.06-cuda12.2-pip"
+    "${localEnv:USER}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.2-pip"
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
-    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {}
+    "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {}
   },
   "overrideFeatureInstallOrder": [
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 6942ef0009d..c5679cc5141 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   cpp-build:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   python-build:
     needs: [cpp-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -46,7 +46,7 @@ jobs:
   upload-conda:
     needs: [cpp-build, python-build]
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -57,7 +57,7 @@ jobs:
     if: github.ref_type == 'branch'
     needs: python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       arch: "amd64"
       branch: ${{ inputs.branch }}
@@ -69,7 +69,7 @@ jobs:
       sha: ${{ inputs.sha }}
   wheel-build-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -79,7 +79,7 @@ jobs:
   wheel-publish-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -89,7 +89,7 @@ jobs:
   wheel-build-dask-cudf:
     needs: wheel-publish-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -101,7 +101,7 @@ jobs:
   wheel-publish-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml
index 60544294809..a8643923a4d 100644
--- a/.github/workflows/pandas-tests.yaml
+++ b/.github/workflows/pandas-tests.yaml
@@ -17,7 +17,7 @@ jobs:
   pandas-tests:
       # run the Pandas unit tests
       secrets: inherit
-      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+      uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
       with:
         matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and .CUDA_VER == "12.2.2" ))
         build_type: nightly
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index f9d5976f1fe..cb582df21e0 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -32,41 +32,41 @@ jobs:
       - pandas-tests
       - pandas-tests-diff
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08
   checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.08
     with:
       enable_check_generated_files: false
   conda-cpp-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.08
     with:
       build_type: pull-request
   conda-cpp-checks:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.08
     with:
       build_type: pull-request
       enable_check_symbols: true
   conda-cpp-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.08
     with:
       build_type: pull-request
   conda-python-build:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08
     with:
       build_type: pull-request
   conda-python-cudf-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08
     with:
       build_type: pull-request
       script: "ci/test_python_cudf.sh"
@@ -74,14 +74,14 @@ jobs:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08
     with:
       build_type: pull-request
       script: "ci/test_python_other.sh"
   conda-java-tests:
     needs: conda-cpp-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -91,7 +91,7 @@ jobs:
   static-configure:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: pull-request
       # Use the wheel container so we can skip conda solves and since our
@@ -101,7 +101,7 @@ jobs:
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -111,7 +111,7 @@ jobs:
   docs-build:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -121,21 +121,21 @@ jobs:
   wheel-build-cudf:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08
     with:
       build_type: pull-request
       script: "ci/build_wheel_cudf.sh"
   wheel-tests-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       build_type: pull-request
       script: ci/test_wheel_cudf.sh
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -144,7 +144,7 @@ jobs:
   wheel-tests-dask-cudf:
     needs: wheel-build-dask-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -152,7 +152,7 @@ jobs:
       script: ci/test_wheel_dask_cudf.sh
   devcontainer:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.08
     with:
       arch: '["amd64"]'
       cuda: '["12.2"]'
@@ -163,7 +163,7 @@ jobs:
   unit-tests-cudf-pandas:
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: pull-request
@@ -172,7 +172,7 @@ jobs:
     # run the Pandas unit tests using PR branch
     needs: wheel-build-cudf
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and .CUDA_VER == "12.2.2" ))
       build_type: pull-request
@@ -182,7 +182,7 @@ jobs:
   pandas-tests-diff:
     # diff the results of running the Pandas unit tests and publish a job summary
     needs: pandas-tests
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
         node_type: cpu4
         build_type: pull-request
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 170f45e23fd..36c9088d93c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-cpp-checks:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -25,7 +25,7 @@ jobs:
       enable_check_symbols: true
   conda-cpp-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -33,7 +33,7 @@ jobs:
       sha: ${{ inputs.sha }}
   conda-cpp-memcheck-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -45,7 +45,7 @@ jobs:
       run_script: "ci/test_cpp_memcheck.sh"
   static-configure:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: pull-request
       # Use the wheel container so we can skip conda solves and since our
@@ -54,7 +54,7 @@ jobs:
       run_script: "ci/configure_cpp_static.sh"
   conda-python-cudf-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -64,7 +64,7 @@ jobs:
   conda-python-other-tests:
     # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -73,7 +73,7 @@ jobs:
       script: "ci/test_python_other.sh"
   conda-java-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -85,7 +85,7 @@ jobs:
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -97,7 +97,7 @@ jobs:
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
@@ -106,7 +106,7 @@ jobs:
       script: ci/test_wheel_cudf.sh
   wheel-tests-dask-cudf:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -117,7 +117,7 @@ jobs:
       script: ci/test_wheel_dask_cudf.sh
   unit-tests-cudf-pandas:
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/README.md b/README.md
index 205e16ea0e5..377998cd991 100644
--- a/README.md
+++ b/README.md
@@ -93,7 +93,7 @@ cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects
 
 ```bash
 conda install -c rapidsai -c conda-forge -c nvidia \
-    cudf=24.06 python=3.11 cuda-version=12.2
+    cudf=24.08 python=3.11 cuda-version=12.2
 ```
 
 We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
diff --git a/VERSION b/VERSION
index 0bff6981a3d..ec8489fda92 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-24.06.00
+24.08.00
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 48699b81eed..2ce1d9597e8 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -26,7 +26,7 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.3
-- dask-cuda==24.6.*
+- dask-cuda==24.8.*
 - dlpack>=0.8,<1.0
 - doxygen=1.9.1
 - fastavro>=0.22.9
@@ -43,10 +43,10 @@ dependencies:
 - libcufile=1.4.0.31
 - libcurand-dev=10.3.0.86
 - libcurand=10.3.0.86
-- libkvikio==24.6.*
+- libkvikio==24.8.*
 - libparquet==16.0.0.*
 - librdkafka>=1.9.0,<1.10.0a0
-- librmm==24.6.*
+- librmm==24.8.*
 - make
 - moto>=4.0.8
 - msgpack-python
@@ -76,9 +76,9 @@ dependencies:
 - python-confluent-kafka>=1.9.0,<1.10.0a0
 - python>=3.9,<3.12
 - pytorch>=2.1.0
-- rapids-dask-dependency==24.6.*
+- rapids-dask-dependency==24.8.*
 - rich
-- rmm==24.6.*
+- rmm==24.8.*
 - s3fs>=2022.3.0
 - scikit-build-core>=0.7.0
 - scipy
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index d06a727f331..64d97dd742e 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -27,7 +27,7 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.3
-- dask-cuda==24.6.*
+- dask-cuda==24.8.*
 - dlpack>=0.8,<1.0
 - doxygen=1.9.1
 - fastavro>=0.22.9
@@ -42,10 +42,10 @@ dependencies:
 - libarrow==16.0.0.*
 - libcufile-dev
 - libcurand-dev
-- libkvikio==24.6.*
+- libkvikio==24.8.*
 - libparquet==16.0.0.*
 - librdkafka>=1.9.0,<1.10.0a0
-- librmm==24.6.*
+- librmm==24.8.*
 - make
 - moto>=4.0.8
 - msgpack-python
@@ -74,9 +74,9 @@ dependencies:
 - python-confluent-kafka>=1.9.0,<1.10.0a0
 - python>=3.9,<3.12
 - pytorch>=2.1.0
-- rapids-dask-dependency==24.6.*
+- rapids-dask-dependency==24.8.*
 - rich
-- rmm==24.6.*
+- rmm==24.8.*
 - s3fs>=2022.3.0
 - scikit-build-core>=0.7.0
 - scipy
diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake
index dff66b4d7d8..144b3d3721b 100644
--- a/cpp/examples/versions.cmake
+++ b/cpp/examples/versions.cmake
@@ -12,4 +12,4 @@
 # the License.
 # =============================================================================
 
-set(CUDF_TAG branch-24.06)
+set(CUDF_TAG branch-24.08)
diff --git a/dependencies.yaml b/dependencies.yaml
index f20c1591e73..39290fd2b93 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -270,8 +270,8 @@ dependencies:
       - output_types: conda
         packages:
           - fmt>=10.1.1,<11
-          - librmm==24.6.*
-          - libkvikio==24.6.*
+          - librmm==24.8.*
+          - libkvikio==24.8.*
           - librdkafka>=1.9.0,<1.10.0a0
           # Align nvcomp version with rapids-cmake
           - nvcomp==3.0.6
@@ -305,7 +305,7 @@ dependencies:
     common:
       - output_types: conda
         packages:
-          - &rmm_conda rmm==24.6.*
+          - &rmm_conda rmm==24.8.*
           - pip
           - pip:
               - git+https://github.com/python-streamz/streamz.git@master
@@ -321,10 +321,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages: &build_python_packages_cu12
-              - &rmm_cu12 rmm-cu12==24.6.*
+              - &rmm_cu12 rmm-cu12==24.8.*
           - matrix: {cuda: "11.*"}
             packages: &build_python_packages_cu11
-              - &rmm_cu11 rmm-cu11==24.6.*
+              - &rmm_cu11 rmm-cu11==24.8.*
           - {matrix: null, packages: [*rmm_conda] }
   libarrow_build:
     common:
@@ -477,7 +477,7 @@ dependencies:
       - output_types: [conda]
         packages:
           - breathe>=4.35.0
-          - dask-cuda==24.6.*
+          - dask-cuda==24.8.*
           - *doxygen
           - make
           - myst-nb
@@ -568,11 +568,11 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - rmm-cu12==24.6.*
+              - rmm-cu12==24.8.*
               - pynvjitlink-cu12
           - matrix: {cuda: "11.*"}
             packages:
-              - rmm-cu11==24.6.*
+              - rmm-cu11==24.8.*
               - cubinlinker-cu11
               - ptxcompiler-cu11
           - {matrix: null, packages: [cubinlinker, ptxcompiler, *rmm_conda]}
@@ -585,7 +585,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - rapids-dask-dependency==24.6.*
+          - rapids-dask-dependency==24.8.*
   run_custreamz:
     common:
       - output_types: conda
@@ -671,13 +671,13 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - dask-cuda==24.6.*
+          - dask-cuda==24.8.*
           - *numba
   depends_on_cudf:
     common:
       - output_types: conda
         packages:
-          - &cudf_conda cudf==24.6.*
+          - &cudf_conda cudf==24.8.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -689,16 +689,16 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cudf-cu12==24.6.*
+              - cudf-cu12==24.8.*
           - matrix: {cuda: "11.*"}
             packages:
-              - cudf-cu11==24.6.*
+              - cudf-cu11==24.8.*
           - {matrix: null, packages: [*cudf_conda]}
   depends_on_cudf_kafka:
     common:
       - output_types: conda
         packages:
-          - &cudf_kafka_conda cudf_kafka==24.6.*
+          - &cudf_kafka_conda cudf_kafka==24.8.*
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -710,10 +710,10 @@ dependencies:
         matrices:
           - matrix: {cuda: "12.*"}
             packages:
-              - cudf_kafka-cu12==24.6.*
+              - cudf_kafka-cu12==24.8.*
           - matrix: {cuda: "11.*"}
             packages:
-              - cudf_kafka-cu11==24.6.*
+              - cudf_kafka-cu11==24.8.*
           - {matrix: null, packages: [*cudf_kafka_conda]}
   depends_on_cupy:
     common:
diff --git a/java/ci/README.md b/java/ci/README.md
index 18ad3cc4d0d..49481efab6b 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-rocky8 bash
 You can download the cuDF repo in the docker container or you can mount it into the container.
 Here I choose to download again in the container.
 ```bash
-git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.06
+git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.08
 ```
 
 ### Build cuDF jar with devtoolset
@@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh"
 
 ### The output
 
-You can find the cuDF jar in java/target/ like cudf-24.06.0-SNAPSHOT-cuda11.jar.
+You can find the cuDF jar in java/target/ like cudf-24.08.0-SNAPSHOT-cuda11.jar.
diff --git a/java/pom.xml b/java/pom.xml
index 46b5ce4c083..70230e6bc71 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
 
     <groupId>ai.rapids</groupId>
     <artifactId>cudf</artifactId>
-    <version>24.06.0-SNAPSHOT</version>
+    <version>24.08.0-SNAPSHOT</version>
 
     <name>cudfjni</name>
     <description>
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 826362f0632..1b7bb106d49 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -8,7 +8,7 @@ requires = [
     "ninja",
     "numpy==1.23.*",
     "pyarrow==16.0.0.*",
-    "rmm==24.6.*",
+    "rmm==24.8.*",
     "scikit-build-core[pyproject]>=0.7.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
@@ -36,7 +36,7 @@ dependencies = [
     "ptxcompiler",
     "pyarrow>=16.0.0,<16.1.0a0",
     "rich",
-    "rmm==24.6.*",
+    "rmm==24.8.*",
     "typing_extensions>=4.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 787dd8a97d7..b1bb4c5bd24 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -22,7 +22,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cudf==24.6.*",
+    "cudf==24.8.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.optional-dependencies]
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index de26a3eb51c..00fde6c0e05 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -18,7 +18,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cudf==24.6.*",
+    "cudf==24.8.*",
     "polars>=0.20.24",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 7786bf98bef..f7e5698900a 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -19,8 +19,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "confluent-kafka>=1.9.0,<1.10.0a0",
-    "cudf==24.6.*",
-    "cudf_kafka==24.6.*",
+    "cudf==24.8.*",
+    "cudf_kafka==24.8.*",
     "streamz",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 5fbdd98225e..e353eac06b9 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -18,12 +18,12 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
-    "cudf==24.6.*",
+    "cudf==24.8.*",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<2.0a0",
     "pandas>=2.0,<2.2.3dev0",
-    "rapids-dask-dependency==24.6.*",
+    "rapids-dask-dependency==24.8.*",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -44,7 +44,7 @@ cudf = "dask_cudf.backends:CudfDXBackendEntrypoint"
 
 [project.optional-dependencies]
 test = [
-    "dask-cuda==24.6.*",
+    "dask-cuda==24.8.*",
     "numba>=0.57",
     "pytest-cov",
     "pytest-xdist",

From 333718ac90b8d98e026aa57cfa0084af4c68a0f3 Mon Sep 17 00:00:00 2001
From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com>
Date: Tue, 21 May 2024 14:31:55 -0400
Subject: [PATCH 02/15] For powers of 10, replace ipow with switch (#15353)

This adds a new runtime calculation of the power-of-10 needed for applying decimal scale factors with a switch statement.  This provides the fastest way of applying the scale.  Note that the multiply and divide operations are performed within the switch itself, so that the compiler sees the full instruction to optimize assembly code gen.  See code comments for details.

This cannot be used within fixed_point (e.g. for comparison operators and rescaling) as it introduced too much register pressure to unrelated benchmarks.  It will only be used for the decimal <--> floating conversion, so it has been moved there to be in a new header file where that code will reside (in an upcoming PR).  This is part of a larger change to change the algorithm for decimal <--> floating conversion to a more accurate one that is forthcoming soon.

Authors:
  - Paul Mattione (https://github.com/pmattione-nvidia)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/15353
---
 .../cudf/fixed_point/floating_conversion.hpp  | 374 ++++++++++++++++++
 1 file changed, 374 insertions(+)
 create mode 100644 cpp/include/cudf/fixed_point/floating_conversion.hpp

diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/floating_conversion.hpp
new file mode 100644
index 00000000000..492f7e75219
--- /dev/null
+++ b/cpp/include/cudf/fixed_point/floating_conversion.hpp
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuda/std/type_traits>
+
+namespace numeric {
+
+/**
+ * @addtogroup floating_conversion
+ * @{
+ * @file
+ * @brief fixed_point <--> floating-point conversion functions.
+ */
+
+namespace detail {
+
+/**
+ * @brief Recursively calculate a signed large power of 10 (>= 10^19) that can only be stored in an
+ * 128bit integer
+ *
+ * @note Intended to be run at compile time.
+ *
+ * @tparam Exp10 The power of 10 to calculate
+ * @return Returns 10^Exp10
+ */
+template <int Exp10>
+constexpr __uint128_t large_power_of_10()
+{
+  // Stop at 10^19 to speed up compilation; literals can be used for smaller powers of 10.
+  static_assert(Exp10 >= 19);
+  if constexpr (Exp10 == 19)
+    return __uint128_t(10000000000000000000ULL);
+  else
+    return large_power_of_10<Exp10 - 1>() * __uint128_t(10);
+}
+
+/**
+ * @brief Divide by a power of 10 that fits within a 32bit integer.
+ *
+ * @tparam T Type of value to be divided-from.
+ * @param value The number to be divided-from.
+ * @param exp10 The power-of-10 of the denominator, from 0 to 9 inclusive.
+ * @return Returns value / 10^exp10
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10)
+{
+  // Computing division this way is much faster than the alternatives.
+  // Division is not implemented in GPU hardware, and the compiler will often implement it as a
+  // multiplication of the reciprocal of the denominator, requiring a conversion to floating point.
+  // Ths is especially slow for larger divides that have to use the FP64 pipeline, where threads
+  // bottleneck.
+
+  // Instead, if the compiler can see exactly what number it is dividing by, it can
+  // produce much more optimal assembly, doing bit shifting, multiplies by a constant, etc.
+  // For the compiler to see the value though, array lookup (with exp10 as the index)
+  // is not sufficient: We have to use a switch statement. Although this introduces a branch,
+  // it is still much faster than doing the divide any other way.
+  // Perhaps an array can be used in C++23 with the assume attribute?
+
+  // Since we're optimizing division this way, we have to do this for multiplication as well.
+  // That's because doing them in different ways (switch, array, runtime-computation, etc.)
+  // increases the register pressure on all kernels that use fixed_point types, specifically slowing
+  // down some of the PYMOD and join benchmarks.
+
+  // This is split up into separate functions for 32-, 64-, and 128-bit denominators.
+  // That way we limit the templated, inlined code generation to the exponents that are
+  // capable of being represented. Combining them together into a single function again
+  // introduces too much pressure on the kernels that use this code, slowing down their benchmarks.
+  // It also dramatically slows down the compile time.
+
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value / 10U;
+    case 2: return value / 100U;
+    case 3: return value / 1000U;
+    case 4: return value / 10000U;
+    case 5: return value / 100000U;
+    case 6: return value / 1000000U;
+    case 7: return value / 10000000U;
+    case 8: return value / 100000000U;
+    case 9: return value / 1000000000U;
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Divide by a power of 10 that fits within a 64bit integer.
+ *
+ * @tparam T Type of value to be divided-from.
+ * @param value The number to be divided-from.
+ * @param exp10 The power-of-10 of the denominator, from 0 to 19 inclusive.
+ * @return Returns value / 10^exp10
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int exp10)
+{
+  // See comments in divide_power10_32bit() for discussion.
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value / 10U;
+    case 2: return value / 100U;
+    case 3: return value / 1000U;
+    case 4: return value / 10000U;
+    case 5: return value / 100000U;
+    case 6: return value / 1000000U;
+    case 7: return value / 10000000U;
+    case 8: return value / 100000000U;
+    case 9: return value / 1000000000U;
+    case 10: return value / 10000000000ULL;
+    case 11: return value / 100000000000ULL;
+    case 12: return value / 1000000000000ULL;
+    case 13: return value / 10000000000000ULL;
+    case 14: return value / 100000000000000ULL;
+    case 15: return value / 1000000000000000ULL;
+    case 16: return value / 10000000000000000ULL;
+    case 17: return value / 100000000000000000ULL;
+    case 18: return value / 1000000000000000000ULL;
+    case 19: return value / 10000000000000000000ULL;
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Divide by a power of 10 that fits within a 128bit integer.
+ *
+ * @tparam T Type of value to be divided-from.
+ * @param value The number to be divided-from.
+ * @param exp10 The power-of-10 of the denominator, from 0 to 38 inclusive.
+ * @return Returns value / 10^exp10.
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int exp10)
+{
+  // See comments in divide_power10_32bit() for an introduction.
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value / 10U;
+    case 2: return value / 100U;
+    case 3: return value / 1000U;
+    case 4: return value / 10000U;
+    case 5: return value / 100000U;
+    case 6: return value / 1000000U;
+    case 7: return value / 10000000U;
+    case 8: return value / 100000000U;
+    case 9: return value / 1000000000U;
+    case 10: return value / 10000000000ULL;
+    case 11: return value / 100000000000ULL;
+    case 12: return value / 1000000000000ULL;
+    case 13: return value / 10000000000000ULL;
+    case 14: return value / 100000000000000ULL;
+    case 15: return value / 1000000000000000ULL;
+    case 16: return value / 10000000000000000ULL;
+    case 17: return value / 100000000000000000ULL;
+    case 18: return value / 1000000000000000000ULL;
+    case 19: return value / 10000000000000000000ULL;
+    case 20: return value / large_power_of_10<20>();
+    case 21: return value / large_power_of_10<21>();
+    case 22: return value / large_power_of_10<22>();
+    case 23: return value / large_power_of_10<23>();
+    case 24: return value / large_power_of_10<24>();
+    case 25: return value / large_power_of_10<25>();
+    case 26: return value / large_power_of_10<26>();
+    case 27: return value / large_power_of_10<27>();
+    case 28: return value / large_power_of_10<28>();
+    case 29: return value / large_power_of_10<29>();
+    case 30: return value / large_power_of_10<30>();
+    case 31: return value / large_power_of_10<31>();
+    case 32: return value / large_power_of_10<32>();
+    case 33: return value / large_power_of_10<33>();
+    case 34: return value / large_power_of_10<34>();
+    case 35: return value / large_power_of_10<35>();
+    case 36: return value / large_power_of_10<36>();
+    case 37: return value / large_power_of_10<37>();
+    case 38: return value / large_power_of_10<38>();
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Multiply by a power of 10 that fits within a 32bit integer.
+ *
+ * @tparam T Type of value to be multiplied.
+ * @param value The number to be multiplied.
+ * @param exp10 The power-of-10 of the multiplier, from 0 to 9 inclusive.
+ * @return Returns value * 10^exp10
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int exp10)
+{
+  // See comments in divide_power10_32bit() for discussion.
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value * 10U;
+    case 2: return value * 100U;
+    case 3: return value * 1000U;
+    case 4: return value * 10000U;
+    case 5: return value * 100000U;
+    case 6: return value * 1000000U;
+    case 7: return value * 10000000U;
+    case 8: return value * 100000000U;
+    case 9: return value * 1000000000U;
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Multiply by a power of 10 that fits within a 64bit integer.
+ *
+ * @tparam T Type of value to be multiplied.
+ * @param value The number to be multiplied.
+ * @param exp10 The power-of-10 of the multiplier, from 0 to 19 inclusive.
+ * @return Returns value * 10^exp10
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int exp10)
+{
+  // See comments in divide_power10_32bit() for discussion.
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value * 10U;
+    case 2: return value * 100U;
+    case 3: return value * 1000U;
+    case 4: return value * 10000U;
+    case 5: return value * 100000U;
+    case 6: return value * 1000000U;
+    case 7: return value * 10000000U;
+    case 8: return value * 100000000U;
+    case 9: return value * 1000000000U;
+    case 10: return value * 10000000000ULL;
+    case 11: return value * 100000000000ULL;
+    case 12: return value * 1000000000000ULL;
+    case 13: return value * 10000000000000ULL;
+    case 14: return value * 100000000000000ULL;
+    case 15: return value * 1000000000000000ULL;
+    case 16: return value * 10000000000000000ULL;
+    case 17: return value * 100000000000000000ULL;
+    case 18: return value * 1000000000000000000ULL;
+    case 19: return value * 10000000000000000000ULL;
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Multiply by a power of 10 that fits within a 128bit integer.
+ *
+ * @tparam T Type of value to be multiplied.
+ * @param value The number to be multiplied.
+ * @param exp10 The power-of-10 of the multiplier, from 0 to 38 inclusive.
+ * @return Returns value * 10^exp10.
+ */
+template <typename T, typename cuda::std::enable_if_t<cuda::std::is_unsigned_v<T>>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int exp10)
+{
+  // See comments in divide_power10_128bit() for discussion.
+  switch (exp10) {
+    case 0: return value;
+    case 1: return value * 10U;
+    case 2: return value * 100U;
+    case 3: return value * 1000U;
+    case 4: return value * 10000U;
+    case 5: return value * 100000U;
+    case 6: return value * 1000000U;
+    case 7: return value * 10000000U;
+    case 8: return value * 100000000U;
+    case 9: return value * 1000000000U;
+    case 10: return value * 10000000000ULL;
+    case 11: return value * 100000000000ULL;
+    case 12: return value * 1000000000000ULL;
+    case 13: return value * 10000000000000ULL;
+    case 14: return value * 100000000000000ULL;
+    case 15: return value * 1000000000000000ULL;
+    case 16: return value * 10000000000000000ULL;
+    case 17: return value * 100000000000000000ULL;
+    case 18: return value * 1000000000000000000ULL;
+    case 19: return value * 10000000000000000000ULL;
+    case 20: return value * large_power_of_10<20>();
+    case 21: return value * large_power_of_10<21>();
+    case 22: return value * large_power_of_10<22>();
+    case 23: return value * large_power_of_10<23>();
+    case 24: return value * large_power_of_10<24>();
+    case 25: return value * large_power_of_10<25>();
+    case 26: return value * large_power_of_10<26>();
+    case 27: return value * large_power_of_10<27>();
+    case 28: return value * large_power_of_10<28>();
+    case 29: return value * large_power_of_10<29>();
+    case 30: return value * large_power_of_10<30>();
+    case 31: return value * large_power_of_10<31>();
+    case 32: return value * large_power_of_10<32>();
+    case 33: return value * large_power_of_10<33>();
+    case 34: return value * large_power_of_10<34>();
+    case 35: return value * large_power_of_10<35>();
+    case 36: return value * large_power_of_10<36>();
+    case 37: return value * large_power_of_10<37>();
+    case 38: return value * large_power_of_10<38>();
+    default: return 0;
+  }
+}
+
+/**
+ * @brief Multiply an integer by a power of 10.
+ *
+ * @note Use this function if you have no a-priori knowledge of what exp10 might be.
+ * If you do, prefer calling the bit-size-specific versions
+ *
+ * @tparam Rep Representation type needed for integer exponentiation
+ * @tparam T Integral type of value to be multiplied.
+ * @param value The number to be multiplied.
+ * @param exp10 The power-of-10 of the multiplier.
+ * @return Returns value * 10^exp10
+ */
+template <typename Rep,
+          typename T,
+          typename cuda::std::enable_if_t<(cuda::std::is_unsigned_v<T>)>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T multiply_power10(T value, int exp10)
+{
+  // Use this function if you have no knowledge of what exp10 might be
+  // If you do, prefer calling the bit-size-specific versions
+  if constexpr (sizeof(Rep) <= 4) {
+    return multiply_power10_32bit(value, exp10);
+  } else if constexpr (sizeof(Rep) <= 8) {
+    return multiply_power10_64bit(value, exp10);
+  } else {
+    return multiply_power10_128bit(value, exp10);
+  }
+}
+
+/**
+ * @brief Divide an integer by a power of 10.
+ *
+ * @note Use this function if you have no a-priori knowledge of what exp10 might be.
+ * If you do, prefer calling the bit-size-specific versions
+ *
+ * @tparam Rep Representation type needed for integer exponentiation
+ * @tparam T Integral type of value to be divided-from.
+ * @param value The number to be divided-from.
+ * @param exp10 The power-of-10 of the denominator.
+ * @return Returns value / 10^exp10
+ */
+template <typename Rep,
+          typename T,
+          typename cuda::std::enable_if_t<(cuda::std::is_unsigned_v<T>)>* = nullptr>
+CUDF_HOST_DEVICE inline constexpr T divide_power10(T value, int exp10)
+{
+  // Use this function if you have no knowledge of what exp10 might be
+  // If you do, prefer calling the bit-size-specific versions
+  if constexpr (sizeof(Rep) <= 4) {
+    return divide_power10_32bit(value, exp10);
+  } else if constexpr (sizeof(Rep) <= 8) {
+    return divide_power10_64bit(value, exp10);
+  } else {
+    return divide_power10_128bit(value, exp10);
+  }
+}
+
+}  // namespace detail
+
+/** @} */  // end of group
+}  // namespace numeric

From 24320a18563f1defd8bf7a164adebc066f8c7135 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Wed, 22 May 2024 12:01:24 -0500
Subject: [PATCH 03/15] Switch cuIO benchmarks to use pinned-pool host
 allocations by default. (#15805)

Previously, the benchmarks used a non-pooled pinned memory allocator by default, and exposed an option to use an internally-declared pooled pinned allocator.  Now that we have a pooled pinned allocator enabled in cuIO itself, this PR switches to using that as the new default for the benchmarks.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/15805
---
 cpp/benchmarks/fixture/nvbench_fixture.hpp | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
index ac0cab4071b..ebcbcb17e98 100644
--- a/cpp/benchmarks/fixture/nvbench_fixture.hpp
+++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -45,8 +45,6 @@ static std::string cuio_host_mem_param{
  * Initializes the default memory resource to use the RMM pool device resource.
  */
 struct nvbench_base_fixture {
-  using host_pooled_mr_t = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-
   inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
   inline auto make_pool()
@@ -90,22 +88,10 @@ struct nvbench_base_fixture {
     return *mr;
   }
 
-  inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
-  {
-    if (!this->host_pooled_mr) {
-      // Don't store in static, as the CUDA context may be destroyed before static destruction
-      this->host_pooled_mr = std::make_shared<host_pooled_mr_t>(
-        std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
-        size_t{1} * 1024 * 1024 * 1024);
-    }
-
-    return *this->host_pooled_mr;
-  }
-
   inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
   {
     if (mode == "pinned") return make_cuio_host_pinned();
-    if (mode == "pinned_pool") return make_cuio_host_pinned_pool();
+    if (mode == "pinned_pool") return cudf::io::get_host_memory_resource();
     CUDF_FAIL("Unknown cuio_host_mem parameter: " + mode + "\nExpecting: pinned or pinned_pool");
   }
 
@@ -139,8 +125,7 @@ struct nvbench_base_fixture {
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
   std::string rmm_mode{"pool"};
 
-  std::shared_ptr<host_pooled_mr_t> host_pooled_mr;
-  std::string cuio_host_mode{"pinned"};
+  std::string cuio_host_mode{"pinned_pool"};
 };
 
 }  // namespace cudf

From 1710e11c3ae9dd072305ca49e12e10d0f2e3aec0 Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Thu, 23 May 2024 08:59:55 -0500
Subject: [PATCH 04/15] Return boolean from config_host_memory_resource instead
 of throwing (#15815)

Closes https://github.com/rapidsai/cudf/issues/15814

This adds a boolean return value from `cudf::io::config_host_memory_resource` to allow the caller to handle the case where the memory resource has already been configured in the past. Before this the function would throw, forcing callers to try/catch.

Authors:
  - Alessandro Bellina (https://github.com/abellina)
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - https://github.com/nvdbaranec
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/15815
---
 cpp/include/cudf/io/memory_resource.hpp       |  4 +++-
 cpp/src/io/utilities/config_utils.cpp         | 20 ++++++++++++-------
 .../java/ai/rapids/cudf/PinnedMemoryPool.java |  7 +++++--
 java/src/main/java/ai/rapids/cudf/Rmm.java    |  5 ++++-
 java/src/main/native/src/RmmJni.cpp           | 11 +++++-----
 5 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/cpp/include/cudf/io/memory_resource.hpp b/cpp/include/cudf/io/memory_resource.hpp
index e31ebce4b1f..a36e220ae7b 100644
--- a/cpp/include/cudf/io/memory_resource.hpp
+++ b/cpp/include/cudf/io/memory_resource.hpp
@@ -57,7 +57,9 @@ struct host_mr_options {
  * @throws cudf::logic_error if called after the default host memory resource has been created
  *
  * @param opts Options to configure the default host memory resource
+ * @return True if this call successfully configured the host memory resource, false if a
+ * a resource was already configured.
  */
-void config_default_host_memory_resource(host_mr_options const& opts);
+bool config_default_host_memory_resource(host_mr_options const& opts);
 
 }  // namespace cudf::io
diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp
index 7720c073a97..dad1135e766 100644
--- a/cpp/src/io/utilities/config_utils.cpp
+++ b/cpp/src/io/utilities/config_utils.cpp
@@ -244,16 +244,20 @@ CUDF_EXPORT std::mutex& host_mr_mutex()
 }
 
 // Must be called with the host_mr_mutex mutex held
-CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional<host_mr_options> const& opts)
+CUDF_EXPORT rmm::host_async_resource_ref& make_host_mr(std::optional<host_mr_options> const& opts,
+                                                       bool* did_configure = nullptr)
 {
   static rmm::host_async_resource_ref* mr_ref = nullptr;
+  bool configured                             = false;
   if (mr_ref == nullptr) {
-    mr_ref = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
-  } else {
-    // Throw an error if the user tries to reconfigure the default host resource
-    CUDF_EXPECTS(opts == std::nullopt, "The default host memory resource has already been created");
+    configured = true;
+    mr_ref     = &make_default_pinned_mr(opts ? opts->pool_size : std::nullopt);
   }
 
+  // If the user passed an out param to detect whether this call configured a resource
+  // set the result
+  if (did_configure != nullptr) { *did_configure = configured; }
+
   return *mr_ref;
 }
 
@@ -278,10 +282,12 @@ rmm::host_async_resource_ref get_host_memory_resource()
   return host_mr();
 }
 
-void config_default_host_memory_resource(host_mr_options const& opts)
+bool config_default_host_memory_resource(host_mr_options const& opts)
 {
   std::scoped_lock lock{host_mr_mutex()};
-  make_host_mr(opts);
+  auto did_configure = false;
+  make_host_mr(opts, &did_configure);
+  return did_configure;
 }
 
 }  // namespace cudf::io
diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
index 9038700cb30..83b801db7fb 100644
--- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
+++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java
@@ -260,9 +260,12 @@ private synchronized void free(long address, long size) {
    *
    * @param size initial and maximum size for the cuDF default pinned pool.
    *        Pass size=0 to disable the default pool.
+   *
+   * @return true if we were able to setup the default resource, false if there was
+   *         a resource already set.
    */
-  public static synchronized void configureDefaultCudfPinnedPoolSize(long size) {
-    Rmm.configureDefaultCudfPinnedPoolSize(size);
+  public static synchronized boolean configureDefaultCudfPinnedPoolSize(long size) {
+    return Rmm.configureDefaultCudfPinnedPoolSize(size);
   }
 
 }
diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index fdbdfdfff6f..4dee1b7aa24 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -273,8 +273,11 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
    *
    * @param size initial and maximum size for the cuDF default pinned pool.
    *        Pass size=0 to disable the default pool.
+   *
+   * @return true if we were able to setup the default resource, false if there was
+   *         a resource already set.
    */
-  public static synchronized native void configureDefaultCudfPinnedPoolSize(long size);
+  public static synchronized native boolean configureDefaultCudfPinnedPoolSize(long size);
 
   /**
    * Get the most recently set pool size or -1 if RMM has not been initialized or pooling is
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index 9c015fee409..fa78f6ca4e2 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -1035,7 +1035,6 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setCuioPinnedPoolMemoryResource(J
     // create a pinned fallback pool that will allocate pinned memory
     // if the regular pinned pool is exhausted
     pinned_fallback_mr.reset(new pinned_fallback_host_memory_resource(pool));
-    // set the cuio host mr and store the prior resource in our static variable
     prior_cuio_host_mr() = cudf::io::set_host_memory_resource(*pinned_fallback_mr);
   }
   CATCH_STD(env, )
@@ -1107,14 +1106,14 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_freeFromFallbackPinnedPool(JNIEnv
   CATCH_STD(env, )
 }
 
-JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_configureDefaultCudfPinnedPoolSize(JNIEnv* env,
-                                                                                  jclass clazz,
-                                                                                  jlong size)
+JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_Rmm_configureDefaultCudfPinnedPoolSize(JNIEnv* env,
+                                                                                      jclass clazz,
+                                                                                      jlong size)
 {
   try {
     cudf::jni::auto_set_device(env);
-    cudf::io::config_default_host_memory_resource(cudf::io::host_mr_options{size});
+    return cudf::io::config_default_host_memory_resource(cudf::io::host_mr_options{size});
   }
-  CATCH_STD(env, )
+  CATCH_STD(env, false)
 }
 }

From 9d8e43ef6ad75f6babc08fea88642ea006822e04 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 May 2024 11:41:49 -0400
Subject: [PATCH 05/15] Remove legacy JSON reader and
 concurrent_unordered_map.cuh. (#15813)

This completes the final two steps and closes https://github.com/rapidsai/cudf/issues/15537. Also addresses one step of https://github.com/rapidsai/cudf/issues/12261.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - David Wendt (https://github.com/davidwendt)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/15813
---
 cpp/CMakeLists.txt                            |   2 -
 cpp/include/cudf/io/json.hpp                  |  32 -
 cpp/src/groupby/hash/groupby.cu               |   1 -
 cpp/src/hash/concurrent_unordered_map.cuh     | 557 ---------------
 cpp/src/hash/managed.cuh                      |  41 --
 cpp/src/io/json/legacy/json_gpu.cu            | 615 ----------------
 cpp/src/io/json/legacy/json_gpu.hpp           |  99 ---
 cpp/src/io/json/legacy/read_json.hpp          |  38 -
 cpp/src/io/json/legacy/reader_impl.cu         | 667 ------------------
 cpp/src/io/json/read_json.cu                  |   9 -
 cpp/tests/CMakeLists.txt                      |   4 -
 cpp/tests/hash_map/map_test.cu                | 217 ------
 cpp/tests/io/json_test.cpp                    |  49 +-
 cpp/tests/io/nested_json_test.cpp             |   2 +-
 python/cudf/cudf/_lib/json.pyx                |   2 -
 .../cudf/_lib/pylibcudf/libcudf/io/json.pxd   |   3 -
 python/cudf/cudf/io/json.py                   |   1 -
 17 files changed, 8 insertions(+), 2331 deletions(-)
 delete mode 100644 cpp/src/hash/concurrent_unordered_map.cuh
 delete mode 100644 cpp/src/hash/managed.cuh
 delete mode 100644 cpp/src/io/json/legacy/json_gpu.cu
 delete mode 100644 cpp/src/io/json/legacy/json_gpu.hpp
 delete mode 100644 cpp/src/io/json/legacy/read_json.hpp
 delete mode 100644 cpp/src/io/json/legacy/reader_impl.cu
 delete mode 100644 cpp/tests/hash_map/map_test.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7390c465ccb..228d21ddccb 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -390,8 +390,6 @@ add_library(
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
   src/io/json/read_json.cu
-  src/io/json/legacy/json_gpu.cu
-  src/io/json/legacy/reader_impl.cu
   src/io/json/parser_features.cpp
   src/io/json/write_json.cu
   src/io/orc/aggregate_orc_metadata.cpp
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index aa4bee4fb5e..65ba8f25577 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -270,15 +270,6 @@ class json_reader_options {
    */
   bool is_enabled_dayfirst() const { return _dayfirst; }
 
-  /**
-   * @brief Whether the legacy reader should be used.
-   *
-   * @deprecated Since 24.06
-   *
-   * @returns true if the legacy reader will be used, false otherwise
-   */
-  [[deprecated]] bool is_enabled_legacy() const { return _legacy; }
-
   /**
    * @brief Whether the reader should keep quotes of string values.
    *
@@ -406,15 +397,6 @@ class json_reader_options {
    */
   void enable_dayfirst(bool val) { _dayfirst = val; }
 
-  /**
-   * @brief Set whether to use the legacy reader.
-   *
-   * @deprecated Since 24.06
-   *
-   * @param val Boolean value to enable/disable the legacy reader
-   */
-  [[deprecated]] void enable_legacy(bool val) { _legacy = val; }
-
   /**
    * @brief Set whether the reader should keep quotes of string values.
    *
@@ -605,20 +587,6 @@ class json_reader_options_builder {
     return *this;
   }
 
-  /**
-   * @brief Set whether to use the legacy reader.
-   *
-   * @deprecated Since 24.06
-   *
-   * @param val Boolean value to enable/disable legacy parsing
-   * @return this for chaining
-   */
-  [[deprecated]] json_reader_options_builder& legacy(bool val)
-  {
-    options._legacy = val;
-    return *this;
-  }
-
   /**
    * @brief Set whether the reader should keep quotes of string values.
    *
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 4f75ab19c66..0ec293ae3f0 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -16,7 +16,6 @@
 
 #include "groupby/common/utils.hpp"
 #include "groupby/hash/groupby_kernels.cuh"
-#include "hash/concurrent_unordered_map.cuh"
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column.hpp>
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
deleted file mode 100644
index a010a462de3..00000000000
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "hash/managed.cuh"
-
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/polymorphic_allocator.hpp>
-
-#include <cuda/atomic>
-#include <thrust/pair.h>
-
-#include <iostream>
-#include <iterator>
-#include <limits>
-#include <type_traits>
-
-namespace {
-template <std::size_t N>
-struct packed {
-  using type = void;
-};
-template <>
-struct packed<sizeof(uint64_t)> {
-  using type = uint64_t;
-};
-template <>
-struct packed<sizeof(uint32_t)> {
-  using type = uint32_t;
-};
-template <typename pair_type>
-using packed_t = typename packed<sizeof(pair_type)>::type;
-
-/**
- * @brief Indicates if a pair type can be packed.
- *
- * When the size of the key,value pair being inserted into the hash table is
- * equal in size to a type where atomicCAS is natively supported, it is more
- * efficient to "pack" the pair and insert it with a single atomicCAS.
- *
- * Only integral key and value types may be packed because we use
- * bitwise equality comparison, which may not be valid for non-integral
- * types.
- *
- * Also, the `pair_type` must not contain any padding bits otherwise
- * accessing the packed value would be undefined.
- *
- * @tparam pair_type The pair type that will be packed
- * @return true If the pair type can be packed
- * @return false  If the pair type cannot be packed
- */
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-constexpr bool is_packable()
-{
-  return std::is_integral_v<key_type> and std::is_integral_v<value_type> and
-         not std::is_void_v<packed_t<pair_type>> and
-         std::has_unique_object_representations_v<pair_type>;
-}
-
-/**
- * @brief Allows viewing a pair in a packed representation
- *
- * Used as an optimization for inserting when a pair can be inserted with a
- * single atomicCAS
- */
-template <typename pair_type, typename Enable = void>
-union pair_packer;
-
-template <typename pair_type>
-union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
-  using packed_type = packed_t<pair_type>;
-  packed_type packed;
-  pair_type pair;
-
-  __device__ pair_packer(pair_type _pair) : pair{_pair} {}
-
-  __device__ pair_packer(packed_type _packed) : packed{_packed} {}
-};
-}  // namespace
-
-/**
- * Supports concurrent insert, but not concurrent insert and find.
- *
- * @note The user is responsible for the following stream semantics:
- * - Either the same stream should be used to create the map as is used by the kernels that access
- * it, or
- * - the stream used to create the map should be synchronized before it is accessed from a different
- * stream or from host code.
- *
- * TODO:
- *  - add constructor that takes pointer to hash_table to avoid allocations
- */
-template <typename Key,
-          typename Element,
-          typename Hasher    = cudf::hashing::detail::default_hash<Key>,
-          typename Equality  = equal_to<Key>,
-          typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
-class concurrent_unordered_map {
- public:
-  using size_type      = size_t;
-  using hasher         = Hasher;
-  using key_equal      = Equality;
-  using allocator_type = Allocator;
-  using key_type       = Key;
-  using mapped_type    = Element;
-  using value_type     = thrust::pair<Key, Element>;
-  using iterator       = cycle_iterator_adapter<value_type*>;
-  using const_iterator = cycle_iterator_adapter<value_type*> const;
-
- public:
-  /**
-   * @brief Factory to construct a new concurrent unordered map.
-   *
-   * Returns a `std::unique_ptr` to a new concurrent unordered map object. The
-   * map is non-owning and trivially copyable and should be passed by value into
-   * kernels. The `unique_ptr` contains a custom deleter that will free the
-   * map's contents.
-   *
-   * @note The implementation of this unordered_map uses sentinel values to
-   * indicate an entry in the hash table that is empty, i.e., if a hash bucket
-   * is empty, the pair residing there will be equal to (unused_key,
-   * unused_element). As a result, attempting to insert a key equal to
-   *`unused_key` results in undefined behavior.
-   *
-   * @note All allocations, kernels and copies in the constructor take place
-   * on stream but the constructor does not synchronize the stream. It is the user's
-   * responsibility to synchronize or use the same stream to access the map.
-   *
-   * @param capacity The maximum number of pairs the map may hold
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @param unused_element The sentinel value to use for an empty value
-   * @param unused_key The sentinel value to use for an empty key
-   * @param hash_function The hash function to use for hashing keys
-   * @param equal The equality comparison function for comparing if two keys are
-   * equal
-   * @param allocator The allocator to use for allocation the hash table's
-   * storage
-   */
-  static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream,
-                     mapped_type const unused_element = std::numeric_limits<mapped_type>::max(),
-                     key_type const unused_key        = std::numeric_limits<key_type>::max(),
-                     Hasher const& hash_function      = hasher(),
-                     Equality const& equal            = key_equal(),
-                     allocator_type const& allocator  = allocator_type())
-  {
-    CUDF_FUNC_RANGE();
-    using Self = concurrent_unordered_map<Key, Element, Hasher, Equality, Allocator>;
-
-    // Note: need `(*p).destroy` instead of `p->destroy` here
-    // due to compiler bug: https://github.com/rapidsai/cudf/pull/5692
-    auto deleter = [stream](Self* p) { (*p).destroy(stream); };
-
-    return std::unique_ptr<Self, std::function<void(Self*)>>{
-      new Self(capacity, unused_element, unused_key, hash_function, equal, allocator, stream),
-      deleter};
-  }
-
-  /**
-   * @brief Returns an iterator to the first element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns iterator to the first element in the map.
-   */
-  __device__ iterator begin()
-  {
-    return iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the first element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns constant iterator to the first element in the map.
-   */
-  __device__ const_iterator begin() const
-  {
-    return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns an iterator to the one past the last element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns iterator to the one past the last element in the map.
-   */
-  __device__ iterator end()
-  {
-    return iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values + m_capacity);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the one past the last element in the map
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns constant iterator to the one past the last element in the map.
-   */
-  __device__ const_iterator end() const
-  {
-    return const_iterator(
-      m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values + m_capacity);
-  }
-  __host__ __device__ value_type* data() const { return m_hashtbl_values; }
-
-  __host__ __device__ key_type get_unused_key() const { return m_unused_key; }
-
-  __host__ __device__ mapped_type get_unused_element() const { return m_unused_element; }
-
-  [[nodiscard]] __host__ __device__ size_type capacity() const { return m_capacity; }
-
- private:
-  /**
-   * @brief Enumeration of the possible results of attempting to insert into
-   *a hash bucket
-   */
-  enum class insert_result {
-    CONTINUE,  ///< Insert did not succeed, continue trying to insert
-               ///< (collision)
-    SUCCESS,   ///< New pair inserted successfully
-    DUPLICATE  ///< Insert did not succeed, key is already present
-  };
-
-  /**
-   * @brief Specialization for value types that can be packed.
-   *
-   * When the size of the key,value pair being inserted is equal in size to
-   *a type where atomicCAS is natively supported, this optimization path
-   *will insert the pair in a single atomicCAS operation.
-   */
-  template <typename pair_type = value_type>
-  __device__ std::enable_if_t<is_packable<pair_type>(), insert_result> attempt_insert(
-    value_type* const __restrict__ insert_location, value_type const& insert_pair)
-  {
-    pair_packer<pair_type> expected{thrust::make_pair(m_unused_key, m_unused_element)};
-    pair_packer<pair_type> desired{insert_pair};
-
-    using packed_type = typename pair_packer<pair_type>::packed_type;
-
-    auto* insert_ptr = reinterpret_cast<packed_type*>(insert_location);
-    cuda::atomic_ref<packed_type, cuda::thread_scope_device> ref{*insert_ptr};
-    auto const success =
-      ref.compare_exchange_strong(expected.packed, desired.packed, cuda::std::memory_order_relaxed);
-
-    if (success) {
-      return insert_result::SUCCESS;
-    } else if (m_equal(expected.pair.first, insert_pair.first)) {
-      return insert_result::DUPLICATE;
-    }
-    return insert_result::CONTINUE;
-  }
-
-  /**
-   * @brief Attempts to insert a key,value pair at the specified hash bucket.
-   *
-   * @param[in] insert_location Pointer to hash bucket to attempt insert
-   * @param[in] insert_pair The pair to insert
-   * @return Enum indicating result of insert attempt.
-   */
-  template <typename pair_type = value_type>
-  __device__ std::enable_if_t<not is_packable<pair_type>(), insert_result> attempt_insert(
-    value_type* const __restrict__ insert_location, value_type const& insert_pair)
-  {
-    auto expected = m_unused_key;
-    cuda::atomic_ref<key_type, cuda::thread_scope_device> ref{insert_location->first};
-    auto const key_success =
-      ref.compare_exchange_strong(expected, insert_pair.first, cuda::std::memory_order_relaxed);
-
-    // Hash bucket empty
-    if (key_success) {
-      insert_location->second = insert_pair.second;
-      return insert_result::SUCCESS;
-    }
-    // Key already exists
-    else if (m_equal(expected, insert_pair.first)) {
-      return insert_result::DUPLICATE;
-    }
-
-    return insert_result::CONTINUE;
-  }
-
- public:
-  /**
-   * @brief Attempts to insert a key, value pair into the map.
-   *
-   * Returns an iterator, boolean pair.
-   *
-   * If the new key already present in the map, the iterator points to
-   * the location of the existing key and the boolean is `false` indicating
-   * that the insert did not succeed.
-   *
-   * If the new key was not present, the iterator points to the location
-   * where the insert occurred and the boolean is `true` indicating that the
-   *insert succeeded.
-   *
-   * @param insert_pair The key and value pair to insert
-   * @return Iterator, Boolean pair. Iterator is to the location of the
-   *newly inserted pair, or the existing pair that prevented the insert.
-   *Boolean indicates insert success.
-   */
-  __device__ thrust::pair<iterator, bool> insert(value_type const& insert_pair)
-  {
-    size_type const key_hash{m_hf(insert_pair.first)};
-    size_type index{key_hash % m_capacity};
-
-    insert_result status{insert_result::CONTINUE};
-
-    value_type* current_bucket{nullptr};
-
-    while (status == insert_result::CONTINUE) {
-      current_bucket = &m_hashtbl_values[index];
-      status         = attempt_insert(current_bucket, insert_pair);
-      index          = (index + 1) % m_capacity;
-    }
-
-    bool const insert_success = status == insert_result::SUCCESS;
-
-    return thrust::make_pair(
-      iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket), insert_success);
-  }
-
-  /**
-   * @brief Searches the map for the specified key.
-   *
-   * @note `find` is not threadsafe with `insert`. I.e., it is not safe to
-   *do concurrent `insert` and `find` operations.
-   *
-   * @param k The key to search for
-   * @return An iterator to the key if it exists, else map.end()
-   */
-  __device__ const_iterator find(key_type const& k) const
-  {
-    size_type const key_hash = m_hf(k);
-    size_type index          = key_hash % m_capacity;
-
-    value_type* current_bucket = &m_hashtbl_values[index];
-
-    while (true) {
-      key_type const existing_key = current_bucket->first;
-
-      if (m_unused_key == existing_key) { return this->end(); }
-
-      if (m_equal(k, existing_key)) {
-        return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket);
-      }
-
-      index          = (index + 1) % m_capacity;
-      current_bucket = &m_hashtbl_values[index];
-    }
-  }
-
-  /**
-   * @brief Searches the map for the specified key.
-   *
-   * This version of the find function specifies a hashing function and an
-   * equality comparison.  This allows the caller to use different functions
-   * for insert and find (for example, when you want to insert keys from
-   * one table and use find to match keys from a different table with the
-   * keys from the first table).
-   *
-   * @note `find` is not threadsafe with `insert`. I.e., it is not safe to
-   * do concurrent `insert` and `find` operations.
-   *
-   * @tparam find_hasher     Type of hashing function
-   * @tparam find_key_equal  Type of equality comparison
-   *
-   * @param k         The key to search for
-   * @param f_hash    The hashing function to use to hash this key
-   * @param f_equal   The equality function to use to compare this key with the
-   *                  contents of the hash table
-   * @return An iterator to the key if it exists, else map.end()
-   */
-  template <typename find_hasher, typename find_key_equal>
-  __device__ const_iterator find(key_type const& k,
-                                 find_hasher f_hash,
-                                 find_key_equal f_equal) const
-  {
-    size_type const key_hash = f_hash(k);
-    size_type index          = key_hash % m_capacity;
-
-    value_type* current_bucket = &m_hashtbl_values[index];
-
-    while (true) {
-      key_type const existing_key = current_bucket->first;
-
-      if (m_unused_key == existing_key) { return this->end(); }
-
-      if (f_equal(k, existing_key)) {
-        return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket);
-      }
-
-      index          = (index + 1) % m_capacity;
-      current_bucket = &m_hashtbl_values[index];
-    }
-  }
-
-  void assign_async(concurrent_unordered_map const& other, rmm::cuda_stream_view stream)
-  {
-    if (other.m_capacity <= m_capacity) {
-      m_capacity = other.m_capacity;
-    } else {
-      m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
-      m_capacity = other.m_capacity;
-      m_capacity = other.m_capacity;
-
-      m_hashtbl_values = m_allocator.allocate(m_capacity, stream);
-    }
-    CUDF_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values,
-                                  other.m_hashtbl_values,
-                                  m_capacity * sizeof(value_type),
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-  }
-
-  void clear_async(rmm::cuda_stream_view stream)
-  {
-    constexpr int block_size = 128;
-    init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-      m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
-  }
-
-  void print()
-  {
-    for (size_type i = 0; i < m_capacity; ++i) {
-      std::cout << i << ": " << m_hashtbl_values[i].first << "," << m_hashtbl_values[i].second
-                << std::endl;
-    }
-  }
-
-  void prefetch(int const dev_id, rmm::cuda_stream_view stream)
-  {
-    cudaPointerAttributes hashtbl_values_ptr_attributes;
-    cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-    if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-      CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-        m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
-    }
-    CUDF_CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value()));
-  }
-
-  /**
-   * @brief Frees the contents of the map and destroys the map object.
-   *
-   * This function is invoked as the deleter of the `std::unique_ptr` returned
-   * from the `create()` factory function.
-   *
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  void destroy(rmm::cuda_stream_view stream)
-  {
-    m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
-    delete this;
-  }
-
-  concurrent_unordered_map()                                           = delete;
-  concurrent_unordered_map(concurrent_unordered_map const&)            = default;
-  concurrent_unordered_map(concurrent_unordered_map&&)                 = default;
-  concurrent_unordered_map& operator=(concurrent_unordered_map const&) = default;
-  concurrent_unordered_map& operator=(concurrent_unordered_map&&)      = default;
-  ~concurrent_unordered_map()                                          = default;
-
- private:
-  hasher m_hf;
-  key_equal m_equal;
-  mapped_type m_unused_element;
-  key_type m_unused_key;
-  allocator_type m_allocator;
-  size_type m_capacity;
-  value_type* m_hashtbl_values;
-
-  /**
-   * @brief Private constructor used by `create` factory function.
-   *
-   * @param capacity The desired m_capacity of the hash table
-   * @param unused_element The sentinel value to use for an empty value
-   * @param unused_key The sentinel value to use for an empty key
-   * @param hash_function The hash function to use for hashing keys
-   * @param equal The equality comparison function for comparing if two keys
-   *are equal
-   * @param allocator The allocator to use for allocation the hash table's
-   * storage
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  concurrent_unordered_map(size_type capacity,
-                           mapped_type const unused_element,
-                           key_type const unused_key,
-                           Hasher const& hash_function,
-                           Equality const& equal,
-                           allocator_type const& allocator,
-                           rmm::cuda_stream_view stream)
-    : m_hf(hash_function),
-      m_equal(equal),
-      m_allocator(allocator),
-      m_capacity(capacity),
-      m_unused_element(unused_element),
-      m_unused_key(unused_key)
-  {
-    m_hashtbl_values         = m_allocator.allocate(m_capacity, stream);
-    constexpr int block_size = 128;
-    {
-      cudaPointerAttributes hashtbl_values_ptr_attributes;
-      cudaError_t status =
-        cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-      if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-        int dev_id = 0;
-        CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
-        CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-          m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
-      }
-    }
-
-    if (m_capacity > 0) {
-      init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-        m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
-    }
-
-    CUDF_CHECK_CUDA(stream.value());
-  }
-};
diff --git a/cpp/src/hash/managed.cuh b/cpp/src/hash/managed.cuh
deleted file mode 100644
index 9797c83c47c..00000000000
--- a/cpp/src/hash/managed.cuh
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cassert>
-#include <new>
-
-struct managed {
-  static void* operator new(size_t n)
-  {
-    void* ptr          = nullptr;
-    cudaError_t result = cudaMallocManaged(&ptr, n);
-    if (cudaSuccess != result || 0 == ptr) throw std::bad_alloc();
-    return ptr;
-  }
-
-  static void operator delete(void* ptr) noexcept
-  {
-    auto const free_result = cudaFree(ptr);
-    assert(free_result == cudaSuccess);
-  }
-};
-
-inline bool isPtrManaged(cudaPointerAttributes attr)
-{
-  return (attr.type == cudaMemoryTypeManaged);
-}
diff --git a/cpp/src/io/json/legacy/json_gpu.cu b/cpp/src/io/json/legacy/json_gpu.cu
deleted file mode 100644
index ff4845fcecb..00000000000
--- a/cpp/src/io/json/legacy/json_gpu.cu
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "io/utilities/column_type_histogram.hpp"
-#include "io/utilities/parsing_utils.cuh"
-#include "io/utilities/trie.cuh"
-#include "json_gpu.hpp"
-
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/types.hpp>
-#include <cudf/utilities/bit.hpp>
-#include <cudf/utilities/span.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/advance.h>
-#include <thrust/detail/copy.h>
-#include <thrust/execution_policy.h>
-#include <thrust/find.h>
-#include <thrust/generate.h>
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/mismatch.h>
-#include <thrust/optional.h>
-#include <thrust/pair.h>
-
-using cudf::device_span;
-using cudf::detail::grid_1d;
-
-namespace cudf::io::json::detail::legacy {
-
-namespace {
-/**
- * @brief CUDA Kernel that adjusts the row range to exclude the character outside of the top level
- * brackets.
- *
- * The top level brackets characters are excluded from the resulting range.
- *
- * @param[in] begin Pointer to the first character in the row
- * @param[in] end pointer to the first character after the row
- */
-__device__ std::pair<char const*, char const*> limit_range_to_brackets(char const* begin,
-                                                                       char const* end)
-{
-  auto const data_begin = thrust::next(thrust::find_if(
-    thrust::seq, begin, end, [] __device__(auto c) { return c == '[' || c == '{'; }));
-  auto const data_end   = thrust::next(thrust::find_if(thrust::seq,
-                                                     thrust::make_reverse_iterator(end),
-                                                     thrust::make_reverse_iterator(data_begin),
-                                                     [](auto c) { return c == ']' || c == '}'; }))
-                          .base();
-  return {data_begin, data_end};
-}
-
-/**
- * @brief Find the first JSON object key in the range.
- *
- * Assumes that begin is not in the middle of a field.
- *
- * @param[in] begin Pointer to the first character in the parsing range
- * @param[in] end pointer to the first character after the parsing range
- * @param[in] quotechar The character used to denote quotes
- *
- * @return Begin and end iterators of the key name; (`end`, `end`) if a key is not found
- */
-__device__ std::pair<char const*, char const*> get_next_key(char const* begin,
-                                                            char const* end,
-                                                            char quotechar)
-{
-  // Key starts after the first quote
-  auto const key_begin = thrust::find(thrust::seq, begin, end, quotechar) + 1;
-  if (key_begin > end) return {end, end};
-
-  // Key ends after the next unescaped quote
-  auto const key_end_pair = thrust::mismatch(
-    thrust::seq, key_begin, end - 1, key_begin + 1, [quotechar] __device__(auto prev_ch, auto ch) {
-      return !(ch == quotechar && prev_ch != '\\');
-    });
-
-  return {key_begin, key_end_pair.second};
-}
-
-/**
- * @brief Returns true is the input character is a valid digit.
- * Supports both decimal and hexadecimal digits (uppercase and lowercase).
- *
- * @param c Character to check
- * @param is_hex Whether to check as a hexadecimal
- *
- * @return `true` if it is digit-like, `false` otherwise
- */
-__device__ __inline__ bool is_digit(char c, bool is_hex = false)
-{
-  if (c >= '0' && c <= '9') return true;
-
-  if (is_hex) {
-    if (c >= 'A' && c <= 'F') return true;
-    if (c >= 'a' && c <= 'f') return true;
-  }
-
-  return false;
-}
-
-/**
- * @brief Returns true if the counters indicate a potentially valid float.
- * False positives are possible because positions are not taken into account.
- * For example, field "e.123-" would match the pattern.
- */
-__device__ __inline__ bool is_like_float(
-  long len, long digit_cnt, long decimal_cnt, long dash_cnt, long exponent_cnt)
-{
-  // Can't have more than one exponent and one decimal point
-  if (decimal_cnt > 1) return false;
-  if (exponent_cnt > 1) return false;
-  // Without the exponent or a decimal point, this is an integer, not a float
-  if (decimal_cnt == 0 && exponent_cnt == 0) return false;
-
-  // Can only have one '-' per component
-  if (dash_cnt > 1 + exponent_cnt) return false;
-
-  // If anything other than these characters is present, it's not a float
-  if (digit_cnt + decimal_cnt + dash_cnt + exponent_cnt != len) return false;
-
-  // Needs at least 1 digit, 2 if exponent is present
-  if (digit_cnt < 1 + exponent_cnt) return false;
-
-  return true;
-}
-
-/**
- * @brief Contains information on a JSON file field.
- */
-struct field_descriptor {
-  cudf::size_type column;
-  char const* value_begin;
-  char const* value_end;
-  bool is_quoted;
-};
-
-/**
- * @brief Parse the first field in the given range and return its descriptor.
- *
- * @param[in] begin Pointer to the first character in the parsing range
- * @param[in] end pointer to the first character after the parsing range
- * @param[in] opts The global parsing behavior options
- * @param[in] field_idx Index of the current field in the input row
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @return Descriptor of the parsed field
- */
-__device__ field_descriptor next_field_descriptor(char const* begin,
-                                                  char const* end,
-                                                  parse_options_view const& opts,
-                                                  cudf::size_type field_idx,
-                                                  col_map_type col_map)
-{
-  auto const desc_pre_trim =
-    col_map.capacity() == 0
-      // No key - column and begin are trivial
-      ? field_descriptor{field_idx,
-                         begin,
-                         cudf::io::gpu::seek_field_end(begin, end, opts, true),
-                         false}
-      : [&]() {
-          auto const key_range = get_next_key(begin, end, opts.quotechar);
-          auto const key_hash  = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
-            cudf::string_view(key_range.first, key_range.second - key_range.first));
-          auto const hash_col = col_map.find(key_hash);
-          // Fall back to field index if not found (parsing error)
-          auto const column = (hash_col != col_map.end()) ? (*hash_col).second : field_idx;
-
-          // Skip the colon between the key and the value
-          auto const value_begin = thrust::find(thrust::seq, key_range.second, end, ':') + 1;
-          return field_descriptor{column,
-                                  value_begin,
-                                  cudf::io::gpu::seek_field_end(value_begin, end, opts, true),
-                                  false};
-        }();
-
-  // Modify start & end to ignore whitespace and quotechars
-  auto const trimmed_value_range =
-    trim_whitespaces(desc_pre_trim.value_begin, desc_pre_trim.value_end);
-  bool const is_quoted =
-    thrust::distance(trimmed_value_range.first, trimmed_value_range.second) >= 2 and
-    *trimmed_value_range.first == opts.quotechar and
-    *thrust::prev(trimmed_value_range.second) == opts.quotechar;
-  return {desc_pre_trim.column,
-          trimmed_value_range.first + static_cast<std::ptrdiff_t>(is_quoted),
-          trimmed_value_range.second - static_cast<std::ptrdiff_t>(is_quoted),
-          is_quoted};
-}
-
-/**
- * @brief Returns the range that contains the data in a given row.
- *
- * Excludes the top-level brackets.
- *
- * @param[in] data Device span pointing to the JSON data in device memory
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] row Index of the row for which the range is returned
- *
- * @return The begin and end iterators of the row data.
- */
-__device__ std::pair<char const*, char const*> get_row_data_range(
-  device_span<char const> const data, device_span<uint64_t const> const row_offsets, size_type row)
-{
-  auto const row_begin = data.begin() + row_offsets[row];
-  auto const row_end =
-    data.begin() + ((row < row_offsets.size() - 1) ? row_offsets[row + 1] : data.size());
-  return limit_range_to_brackets(row_begin, row_end);
-}
-
-/**
- * @brief CUDA kernel that parses and converts plain text data into cuDF column data.
- *
- * Data is processed one record at a time
- *
- * @param[in] opts A set of parsing options
- * @param[in] data The entire data to read
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] column_types The data type of each column
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[out] output_columns The output column data
- * @param[out] valid_fields The bitmaps indicating whether column fields are valid
- * @param[out] num_valid_fields The numbers of valid fields in columns
- */
-CUDF_KERNEL void convert_data_to_columns_kernel(parse_options_view opts,
-                                                device_span<char const> const data,
-                                                device_span<uint64_t const> const row_offsets,
-                                                device_span<data_type const> const column_types,
-                                                col_map_type col_map,
-                                                device_span<void* const> const output_columns,
-                                                device_span<bitmask_type* const> const valid_fields,
-                                                device_span<cudf::size_type> const num_valid_fields)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const row_data_range = get_row_data_range(data, row_offsets, rec_id);
-
-  auto current = row_data_range.first;
-  for (size_type input_field_index = 0;
-       input_field_index < column_types.size() && current < row_data_range.second;
-       input_field_index++) {
-    auto const desc =
-      next_field_descriptor(current, row_data_range.second, opts, input_field_index, col_map);
-    auto const value_len = static_cast<size_t>(std::max(desc.value_end - desc.value_begin, 0L));
-    auto const is_quoted = static_cast<std::ptrdiff_t>(desc.is_quoted);
-
-    current = desc.value_end + 1;
-
-    using string_index_pair = thrust::pair<char const*, size_type>;
-
-    if (!serialized_trie_contains(opts.trie_na,
-                                  {desc.value_begin - is_quoted, value_len + is_quoted * 2})) {
-      // Type dispatcher does not handle strings
-      if (column_types[desc.column].id() == type_id::STRING) {
-        auto str_list           = static_cast<string_index_pair*>(output_columns[desc.column]);
-        str_list[rec_id].first  = desc.value_begin;
-        str_list[rec_id].second = value_len;
-
-        // set the valid bitmap - all bits were set to 0 to start
-        set_bit(valid_fields[desc.column], rec_id);
-        atomicAdd(&num_valid_fields[desc.column], 1);
-      } else {
-        if (cudf::type_dispatcher(column_types[desc.column],
-                                  ConvertFunctor{},
-                                  desc.value_begin,
-                                  desc.value_end,
-                                  output_columns[desc.column],
-                                  rec_id,
-                                  column_types[desc.column],
-                                  opts,
-                                  false)) {
-          // set the valid bitmap - all bits were set to 0 to start
-          set_bit(valid_fields[desc.column], rec_id);
-          atomicAdd(&num_valid_fields[desc.column], 1);
-        }
-      }
-    } else if (column_types[desc.column].id() == type_id::STRING) {
-      auto str_list           = static_cast<string_index_pair*>(output_columns[desc.column]);
-      str_list[rec_id].first  = nullptr;
-      str_list[rec_id].second = 0;
-    }
-  }
-}
-
-/**
- * @brief CUDA kernel that processes a buffer of data and determines information about the
- * column types within.
- *
- * Data is processed in one row/record at a time, so the number of total
- * threads (tid) is equal to the number of rows.
- *
- * @param[in] opts A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] rec_starts The offset of each row in the input
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[in] num_columns The number of columns of input data
- * @param[out] column_infos The count for each column data type
- */
-CUDF_KERNEL void detect_data_types_kernel(
-  parse_options_view const opts,
-  device_span<char const> const data,
-  device_span<uint64_t const> const row_offsets,
-  col_map_type col_map,
-  int num_columns,
-  device_span<cudf::io::column_type_histogram> const column_infos)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const are_rows_objects = col_map.capacity() != 0;
-  auto const row_data_range   = get_row_data_range(data, row_offsets, rec_id);
-
-  size_type input_field_index = 0;
-  for (auto current = row_data_range.first;
-       input_field_index < num_columns && current < row_data_range.second;
-       input_field_index++) {
-    auto const desc =
-      next_field_descriptor(current, row_data_range.second, opts, input_field_index, col_map);
-    auto const value_len = static_cast<size_t>(std::max(desc.value_end - desc.value_begin, 0L));
-
-    // Advance to the next field; +1 to skip the delimiter
-    current = desc.value_end + 1;
-
-    // Checking if the field is empty/valid
-    if (serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) {
-      // Increase the null count for array rows, where the null count is initialized to zero.
-      if (!are_rows_objects) { atomicAdd(&column_infos[desc.column].null_count, 1); }
-      continue;
-    } else if (are_rows_objects) {
-      // For files with object rows, null count is initialized to row count. The value is decreased
-      // here for every valid field.
-      atomicAdd(&column_infos[desc.column].null_count, -1);
-    }
-    // Don't need counts to detect strings, any field in quotes is deduced to be a string
-    if (desc.is_quoted) {
-      atomicAdd(&column_infos[desc.column].string_count, 1);
-      continue;
-    }
-
-    int digit_count    = 0;
-    int decimal_count  = 0;
-    int slash_count    = 0;
-    int dash_count     = 0;
-    int plus_count     = 0;
-    int colon_count    = 0;
-    int exponent_count = 0;
-    int other_count    = 0;
-
-    bool const maybe_hex =
-      ((value_len > 2 && *desc.value_begin == '0' && *(desc.value_begin + 1) == 'x') ||
-       (value_len > 3 && *desc.value_begin == '-' && *(desc.value_begin + 1) == '0' &&
-        *(desc.value_begin + 2) == 'x'));
-    for (auto pos = desc.value_begin; pos < desc.value_end; ++pos) {
-      if (is_digit(*pos, maybe_hex)) {
-        digit_count++;
-        continue;
-      }
-      // Looking for unique characters that will help identify column types
-      switch (*pos) {
-        case '.': decimal_count++; break;
-        case '-': dash_count++; break;
-        case '+': plus_count++; break;
-        case '/': slash_count++; break;
-        case ':': colon_count++; break;
-        case 'e':
-        case 'E':
-          if (!maybe_hex && pos > desc.value_begin && pos < desc.value_end - 1) exponent_count++;
-          break;
-        default: other_count++; break;
-      }
-    }
-
-    // Integers have to have the length of the string
-    int int_req_number_cnt = value_len;
-    // Off by one if they start with a minus sign
-    if ((*desc.value_begin == '-' || *desc.value_begin == '+') && value_len > 1) {
-      --int_req_number_cnt;
-    }
-    // Off by one if they are a hexadecimal number
-    if (maybe_hex) { --int_req_number_cnt; }
-    if (serialized_trie_contains(opts.trie_true, {desc.value_begin, value_len}) ||
-        serialized_trie_contains(opts.trie_false, {desc.value_begin, value_len})) {
-      atomicAdd(&column_infos[desc.column].bool_count, 1);
-    } else if (digit_count == int_req_number_cnt) {
-      bool is_negative       = (*desc.value_begin == '-');
-      char const* data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+'));
-      cudf::size_type* ptr   = cudf::io::gpu::infer_integral_field_counter(
-        data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]);
-      atomicAdd(ptr, 1);
-    } else if (is_like_float(
-                 value_len, digit_count, decimal_count, dash_count + plus_count, exponent_count)) {
-      atomicAdd(&column_infos[desc.column].float_count, 1);
-    }
-    // A date-time field cannot have more than 3 non-special characters
-    // A number field cannot have more than one decimal point
-    else if (other_count > 3 || decimal_count > 1) {
-      atomicAdd(&column_infos[desc.column].string_count, 1);
-    } else {
-      // A date field can have either one or two '-' or '\'; A legal combination will only have one
-      // of them To simplify the process of auto column detection, we are not covering all the
-      // date-time formation permutations
-      if ((dash_count > 0 && dash_count <= 2 && slash_count == 0) ||
-          (dash_count == 0 && slash_count > 0 && slash_count <= 2)) {
-        if (colon_count <= 2) {
-          atomicAdd(&column_infos[desc.column].datetime_count, 1);
-        } else {
-          atomicAdd(&column_infos[desc.column].string_count, 1);
-        }
-      } else {
-        // Default field type is string
-        atomicAdd(&column_infos[desc.column].string_count, 1);
-      }
-    }
-  }
-  if (!are_rows_objects) {
-    // For array rows, mark missing fields as null
-    for (; input_field_index < num_columns; ++input_field_index)
-      atomicAdd(&column_infos[input_field_index].null_count, 1);
-  }
-}
-
-/**
- * @brief Input data range that contains a field in key:value format.
- */
-struct key_value_range {
-  char const* key_begin;
-  char const* key_end;
-  char const* value_begin;
-  char const* value_end;
-};
-
-/**
- * @brief Parse the next field in key:value format and return ranges of its parts.
- */
-__device__ key_value_range get_next_key_value_range(char const* begin,
-                                                    char const* end,
-                                                    parse_options_view const& opts)
-{
-  auto const key_range = get_next_key(begin, end, opts.quotechar);
-
-  // Colon between the key and the value
-  auto const colon = thrust::find(thrust::seq, key_range.second, end, ':');
-  if (colon == end) return {end, end, end};
-
-  // Field value (including delimiters)
-  auto const value_end = cudf::io::gpu::seek_field_end(colon + 1, end, opts, true);
-  return {key_range.first, key_range.second, colon + 1, value_end};
-}
-
-/**
- * @brief Cuda kernel that collects information about JSON object keys in the file.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[out] keys_cnt Number of keys found in the file
- * @param[out] keys_info optional, information (offset, length, hash) for each found key
- */
-CUDF_KERNEL void collect_keys_info_kernel(parse_options_view const options,
-                                          device_span<char const> const data,
-                                          device_span<uint64_t const> const row_offsets,
-                                          unsigned long long int* keys_cnt,
-                                          thrust::optional<mutable_table_device_view> keys_info)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const row_data_range = get_row_data_range(data, row_offsets, rec_id);
-
-  auto advance = [&](char const* begin) {
-    return get_next_key_value_range(begin, row_data_range.second, options);
-  };
-  for (auto field_range = advance(row_data_range.first);
-       field_range.key_begin < row_data_range.second;
-       field_range = advance(field_range.value_end)) {
-    auto const idx = atomicAdd(keys_cnt, 1ULL);
-    if (keys_info.has_value()) {
-      auto const len                              = field_range.key_end - field_range.key_begin;
-      keys_info->column(0).element<uint64_t>(idx) = field_range.key_begin - data.begin();
-      keys_info->column(1).element<uint16_t>(idx) = len;
-      keys_info->column(2).element<uint32_t>(idx) =
-        cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
-          cudf::string_view(field_range.key_begin, len));
-    }
-  }
-}
-
-}  // namespace
-
-/**
- * @copydoc cudf::io::json::detail::legacy::convert_json_to_columns
- */
-void convert_json_to_columns(parse_options_view const& opts,
-                             device_span<char const> const data,
-                             device_span<uint64_t const> const row_offsets,
-                             device_span<data_type const> const column_types,
-                             col_map_type* col_map,
-                             device_span<void* const> const output_columns,
-                             device_span<bitmask_type* const> const valid_fields,
-                             device_span<cudf::size_type> num_valid_fields,
-                             rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(cudaOccupancyMaxPotentialBlockSize(
-    &min_grid_size, &block_size, convert_data_to_columns_kernel));
-
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  convert_data_to_columns_kernel<<<grid_size, block_size, 0, stream.value()>>>(opts,
-                                                                               data,
-                                                                               row_offsets,
-                                                                               column_types,
-                                                                               *col_map,
-                                                                               output_columns,
-                                                                               valid_fields,
-                                                                               num_valid_fields);
-
-  CUDF_CHECK_CUDA(stream.value());
-}
-
-/**
- * @copydoc cudf::io::json::detail::legacy::detect_data_types
- */
-
-std::vector<cudf::io::column_type_histogram> detect_data_types(
-  parse_options_view const& options,
-  device_span<char const> const data,
-  device_span<uint64_t const> const row_offsets,
-  bool do_set_null_count,
-  int num_columns,
-  col_map_type* col_map,
-  rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(
-    cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, detect_data_types_kernel));
-
-  auto d_column_infos = [&]() {
-    if (do_set_null_count) {
-      rmm::device_uvector<cudf::io::column_type_histogram> d_column_infos(num_columns, stream);
-      // Set the null count to the row count (all fields assumes to be null).
-      thrust::generate(
-        rmm::exec_policy(stream),
-        d_column_infos.begin(),
-        d_column_infos.end(),
-        [num_records = static_cast<cudf::size_type>(row_offsets.size())] __device__() {
-          return cudf::io::column_type_histogram{num_records};
-        });
-      return d_column_infos;
-    } else {
-      return cudf::detail::make_zeroed_device_uvector_async<cudf::io::column_type_histogram>(
-        num_columns, stream, rmm::mr::get_current_device_resource());
-    }
-  }();
-
-  // Calculate actual block count to use based on records count
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  detect_data_types_kernel<<<grid_size, block_size, 0, stream.value()>>>(
-    options, data, row_offsets, *col_map, num_columns, d_column_infos);
-
-  return cudf::detail::make_std_vector_sync(d_column_infos, stream);
-}
-
-/**
- * @copydoc cudf::io::json::detail::legacy::collect_keys_info
- */
-void collect_keys_info(parse_options_view const& options,
-                       device_span<char const> const data,
-                       device_span<uint64_t const> const row_offsets,
-                       unsigned long long int* keys_cnt,
-                       thrust::optional<mutable_table_device_view> keys_info,
-                       rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(
-    cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, collect_keys_info_kernel));
-
-  // Calculate actual block count to use based on records count
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  collect_keys_info_kernel<<<grid_size, block_size, 0, stream.value()>>>(
-    options, data, row_offsets, keys_cnt, keys_info);
-
-  CUDF_CHECK_CUDA(stream.value());
-}
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/json_gpu.hpp b/cpp/src/io/json/legacy/json_gpu.hpp
deleted file mode 100644
index 853e30c9427..00000000000
--- a/cpp/src/io/json/legacy/json_gpu.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "hash/concurrent_unordered_map.cuh"
-#include "io/utilities/column_type_histogram.hpp"
-#include "io/utilities/parsing_utils.cuh"
-
-#include <cudf/table/table_device_view.cuh>
-#include <cudf/types.hpp>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <thrust/optional.h>
-
-using cudf::device_span;
-
-namespace cudf::io::json::detail::legacy {
-
-using col_map_type = concurrent_unordered_map<uint32_t, cudf::size_type>;
-/**
- * @brief Convert a buffer of input data (text) into raw cuDF column data.
- *
- * @param[in] options A set of parsing options
- * @param[in] data The entire data to read
- * @param[in] row_offsets The start of each data record
- * @param[in] dtypes The data type of each column
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[out] output_columns The output column data
- * @param[out] valid_fields The bitmaps indicating whether column fields are valid
- * @param[out] num_valid_fields The numbers of valid fields in columns
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- */
-void convert_json_to_columns(parse_options_view const& options,
-                             device_span<char const> data,
-                             device_span<uint64_t const> row_offsets,
-                             device_span<data_type const> column_types,
-                             col_map_type* col_map,
-                             device_span<void* const> output_columns,
-                             device_span<bitmask_type* const> valid_fields,
-                             device_span<cudf::size_type> num_valid_fields,
-                             rmm::cuda_stream_view stream);
-
-/**
- * @brief Process a buffer of data and determine information about the column types within.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] num_columns The number of columns of input data
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @returns The count for each column data type
- */
-std::vector<cudf::io::column_type_histogram> detect_data_types(
-  parse_options_view const& options,
-  device_span<char const> data,
-  device_span<uint64_t const> row_offsets,
-  bool do_set_null_count,
-  int num_columns,
-  col_map_type* col_map,
-  rmm::cuda_stream_view stream);
-
-/**
- * @brief Collects information about JSON object keys in the file.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[out] keys_cnt Number of keys found in the file
- * @param[out] keys_info optional, information (offset, length, hash) for each found key
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- */
-void collect_keys_info(parse_options_view const& options,
-                       device_span<char const> data,
-                       device_span<uint64_t const> row_offsets,
-                       unsigned long long int* keys_cnt,
-                       thrust::optional<mutable_table_device_view> keys_info,
-                       rmm::cuda_stream_view stream);
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/read_json.hpp b/cpp/src/io/json/legacy/read_json.hpp
deleted file mode 100644
index 2c02fdd402f..00000000000
--- a/cpp/src/io/json/legacy/read_json.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/types.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/mr/memory_resource.h>
-
-#include <memory>
-#include <vector>
-
-namespace cudf::io {
-class json_reader_options;  // forward decl
-}
-
-namespace cudf::io::json::detail::legacy {
-
-table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
-                              json_reader_options const& reader_opts,
-                              rmm::cuda_stream_view stream,
-                              rmm::device_async_resource_ref mr);
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/reader_impl.cu b/cpp/src/io/json/legacy/reader_impl.cu
deleted file mode 100644
index 846b3cfab4e..00000000000
--- a/cpp/src/io/json/legacy/reader_impl.cu
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hash/concurrent_unordered_map.cuh"
-#include "io/comp/io_uncomp.hpp"
-#include "io/utilities/column_buffer.hpp"
-#include "io/utilities/parsing_utils.cuh"
-#include "json_gpu.hpp"
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/detail/utilities/visitor_overload.hpp>
-#include <cudf/groupby.hpp>
-#include <cudf/io/datasource.hpp>
-#include <cudf/io/detail/json.hpp>
-#include <cudf/io/json.hpp>
-#include <cudf/sorting.hpp>
-#include <cudf/strings/detail/replace.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_scalar.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/for_each.h>
-#include <thrust/functional.h>
-#include <thrust/host_vector.h>
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
-#include <thrust/pair.h>
-#include <thrust/sort.h>
-#include <thrust/transform.h>
-
-using cudf::host_span;
-
-namespace cudf::io::json::detail::legacy {
-
-using col_map_ptr_type = std::unique_ptr<col_map_type, std::function<void(col_map_type*)>>;
-
-/**
- * @brief Aggregate the table containing keys info by their hash values.
- *
- * @param[in] info Table with columns containing key offsets, lengths and hashes, respectively
- *
- * @return Table with data aggregated by key hash values
- */
-std::unique_ptr<table> aggregate_keys_info(std::unique_ptr<table> info)
-{
-  auto const info_view = info->view();
-  std::vector<groupby::aggregation_request> requests;
-  requests.emplace_back(groupby::aggregation_request{info_view.column(0)});
-  requests.back().aggregations.emplace_back(make_min_aggregation<groupby_aggregation>());
-  requests.back().aggregations.emplace_back(make_nth_element_aggregation<groupby_aggregation>(0));
-
-  requests.emplace_back(groupby::aggregation_request{info_view.column(1)});
-  requests.back().aggregations.emplace_back(make_min_aggregation<groupby_aggregation>());
-  requests.back().aggregations.emplace_back(make_nth_element_aggregation<groupby_aggregation>(0));
-
-  // Aggregate by hash values
-  groupby::groupby gb_obj(
-    table_view({info_view.column(2)}), null_policy::EXCLUDE, sorted::NO, {}, {});
-
-  auto result = gb_obj.aggregate(requests);  // TODO: no stream parameter?
-
-  std::vector<std::unique_ptr<column>> out_columns;
-  out_columns.emplace_back(std::move(result.second[0].results[0]));  // offsets
-  out_columns.emplace_back(std::move(result.second[1].results[0]));  // lengths
-  out_columns.emplace_back(std::move(result.first->release()[0]));   // hashes
-  return std::make_unique<table>(std::move(out_columns));
-}
-
-/**
- * @brief Initializes the (key hash -> column index) hash map.
- */
-col_map_ptr_type create_col_names_hash_map(column_view column_name_hashes,
-                                           rmm::cuda_stream_view stream)
-{
-  auto key_col_map       = col_map_type::create(column_name_hashes.size(), stream);
-  auto const column_data = column_name_hashes.data<uint32_t>();
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     column_name_hashes.size(),
-                     [map = *key_col_map, column_data] __device__(size_type idx) mutable {
-                       map.insert(thrust::make_pair(column_data[idx], idx));
-                     });
-  return key_col_map;
-}
-
-/**
- * @brief Create a table whose columns contain the information on JSON objects' keys.
- *
- * The columns contain name offsets in the file, name lengths and name hashes, respectively.
- *
- * @param[in] options Parsing options (e.g. delimiter and quotation character)
- * @param[in] data Input JSON device data
- * @param[in] row_offsets Device array of row start locations in the input buffer
- * @param[in] stream CUDA stream used for device memory operations and kernel launches
- *
- * @return std::unique_ptr<table> cudf table with three columns (offsets, lengths, hashes)
- */
-std::unique_ptr<table> create_json_keys_info_table(parse_options_view const& parse_opts,
-                                                   device_span<char const> const data,
-                                                   device_span<uint64_t const> const row_offsets,
-                                                   rmm::cuda_stream_view stream)
-{
-  // Count keys
-  rmm::device_scalar<unsigned long long int> key_counter(0, stream);
-  collect_keys_info(parse_opts, data, row_offsets, key_counter.data(), {}, stream);
-
-  // Allocate columns to store hash value, length, and offset of each JSON object key in the input
-  auto const num_keys = key_counter.value(stream);
-  std::vector<std::unique_ptr<column>> info_columns;
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT64), num_keys, mask_state::UNALLOCATED, stream));
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT16), num_keys, mask_state::UNALLOCATED, stream));
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT32), num_keys, mask_state::UNALLOCATED, stream));
-  // Create a table out of these columns to pass them around more easily
-  auto info_table           = std::make_unique<table>(std::move(info_columns));
-  auto const info_table_mdv = mutable_table_device_view::create(info_table->mutable_view(), stream);
-
-  // Reset the key counter - now used for indexing
-  key_counter.set_value_to_zero_async(stream);
-  // Fill the allocated columns
-  collect_keys_info(parse_opts, data, row_offsets, key_counter.data(), {*info_table_mdv}, stream);
-  return info_table;
-}
-
-/**
- * @brief Extract the keys from the JSON file the name offsets/lengths.
- */
-std::vector<std::string> create_key_strings(char const* h_data,
-                                            table_view sorted_info,
-                                            rmm::cuda_stream_view stream)
-{
-  auto const num_cols = sorted_info.num_rows();
-  std::vector<uint64_t> h_offsets(num_cols);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(),
-                                sorted_info.column(0).data<uint64_t>(),
-                                sizeof(uint64_t) * num_cols,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  std::vector<uint16_t> h_lens(num_cols);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(h_lens.data(),
-                                sorted_info.column(1).data<uint16_t>(),
-                                sizeof(uint16_t) * num_cols,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  std::vector<std::string> names(num_cols);
-  std::transform(h_offsets.cbegin(),
-                 h_offsets.cend(),
-                 h_lens.cbegin(),
-                 names.begin(),
-                 [&](auto offset, auto len) { return std::string(h_data + offset, len); });
-  return names;
-}
-
-auto sort_keys_info_by_offset(std::unique_ptr<table> info)
-{
-  auto const agg_offset_col_view = info->get_column(0).view();
-  return sort_by_key(info->view(), table_view({agg_offset_col_view}));
-}
-
-/**
- * @brief Extract JSON object keys from a JSON file.
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @return Names of JSON object keys in the file
- */
-std::pair<std::vector<std::string>, col_map_ptr_type> get_json_object_keys_hashes(
-  parse_options_view const& parse_opts,
-  host_span<char const> h_data,
-  device_span<uint64_t const> rec_starts,
-  device_span<char const> d_data,
-  rmm::cuda_stream_view stream)
-{
-  auto info = create_json_keys_info_table(parse_opts, d_data, rec_starts, stream);
-
-  auto aggregated_info = aggregate_keys_info(std::move(info));
-  auto sorted_info     = sort_keys_info_by_offset(std::move(aggregated_info));
-
-  return {create_key_strings(h_data.data(), sorted_info->view(), stream),
-          create_col_names_hash_map(sorted_info->get_column(2).view(), stream)};
-}
-
-std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
-                                      compression_type compression,
-                                      size_t range_offset,
-                                      size_t range_size,
-                                      size_t range_size_padded)
-{
-  CUDF_FUNC_RANGE();
-  // Iterate through the user defined sources and read the contents into the local buffer
-  size_t total_source_size = 0;
-  for (auto const& source : sources) {
-    total_source_size += source->size();
-  }
-  total_source_size = total_source_size - (range_offset * sources.size());
-
-  auto buffer = std::vector<uint8_t>(total_source_size);
-
-  size_t bytes_read = 0;
-  for (auto const& source : sources) {
-    if (!source->is_empty()) {
-      auto data_size   = (range_size_padded != 0) ? range_size_padded : source->size();
-      auto destination = buffer.data() + bytes_read;
-      bytes_read += source->host_read(range_offset, data_size, destination);
-    }
-  }
-
-  if (compression == compression_type::NONE) {
-    return buffer;
-  } else {
-    return decompress(compression, buffer);
-  }
-}
-
-bool should_load_whole_source(json_reader_options const& reader_opts)
-{
-  return reader_opts.get_byte_range_offset() == 0 and  //
-         reader_opts.get_byte_range_size() == 0;
-}
-
-rmm::device_uvector<uint64_t> find_record_starts(json_reader_options const& reader_opts,
-                                                 host_span<char const> h_data,
-                                                 device_span<char const> d_data,
-                                                 rmm::cuda_stream_view stream)
-{
-  std::vector<char> chars_to_count{'\n'};
-  // Currently, ignoring lineterminations within quotes is handled by recording the records of both,
-  // and then filtering out the records that is a quotechar or a linetermination within a quotechar
-  // pair.
-  // If not starting at an offset, add an extra row to account for the first row in the file
-  cudf::size_type prefilter_count = ((reader_opts.get_byte_range_offset() == 0) ? 1 : 0);
-  if (should_load_whole_source(reader_opts)) {
-    prefilter_count += count_all_from_set(d_data, chars_to_count, stream);
-  } else {
-    prefilter_count += count_all_from_set(h_data, chars_to_count, stream);
-  }
-
-  rmm::device_uvector<uint64_t> rec_starts(prefilter_count, stream);
-
-  auto* find_result_ptr = rec_starts.data();
-  // Manually adding an extra row to account for the first row in the file
-  if (reader_opts.get_byte_range_offset() == 0) {
-    find_result_ptr++;
-    CUDF_CUDA_TRY(cudaMemsetAsync(rec_starts.data(), 0ull, sizeof(uint64_t), stream.value()));
-  }
-
-  std::vector<char> chars_to_find{'\n'};
-  // Passing offset = 1 to return positions AFTER the found character
-  if (should_load_whole_source(reader_opts)) {
-    find_all_from_set(d_data, chars_to_find, 1, find_result_ptr, stream);
-  } else {
-    find_all_from_set(h_data, chars_to_find, 1, find_result_ptr, stream);
-  }
-
-  // Previous call stores the record positions as encountered by all threads
-  // Sort the record positions as subsequent processing may require filtering
-  // certain rows or other processing on specific records
-  thrust::sort(rmm::exec_policy(stream), rec_starts.begin(), rec_starts.end());
-
-  auto filtered_count = prefilter_count;
-
-  // Exclude the ending newline as it does not precede a record start
-  if (h_data.back() == '\n') { filtered_count--; }
-  rec_starts.resize(filtered_count, stream);
-
-  return rec_starts;
-}
-
-/**
- * @brief Uploads the relevant segment of the input json data onto the GPU.
- *
- * Sets the d_data_ data member.
- * Only rows that need to be parsed are copied, based on the byte range
- * Also updates the array of record starts to match the device data offset.
- */
-rmm::device_uvector<char> upload_data_to_device(json_reader_options const& reader_opts,
-                                                host_span<char const> h_data,
-                                                rmm::device_uvector<uint64_t>& rec_starts,
-                                                rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  size_t end_offset = h_data.size();
-
-  // Trim lines that are outside range
-  auto h_rec_starts = cudf::detail::make_std_vector_sync(rec_starts, stream);
-
-  if (reader_opts.get_byte_range_size() != 0) {
-    auto it = h_rec_starts.end() - 1;
-    while (it >= h_rec_starts.begin() && *it > reader_opts.get_byte_range_size()) {
-      end_offset = *it;
-      --it;
-    }
-    h_rec_starts.erase(it + 1, h_rec_starts.end());
-  }
-
-  // Resize to exclude rows outside of the range
-  // Adjust row start positions to account for the data subcopy
-  size_t start_offset = h_rec_starts.front();
-  rec_starts.resize(h_rec_starts.size(), stream);
-  thrust::transform(rmm::exec_policy(stream),
-                    rec_starts.begin(),
-                    rec_starts.end(),
-                    thrust::make_constant_iterator(start_offset),
-                    rec_starts.begin(),
-                    thrust::minus<uint64_t>());
-
-  size_t const bytes_to_upload = end_offset - start_offset;
-  CUDF_EXPECTS(bytes_to_upload <= h_data.size(),
-               "Error finding the record within the specified byte range.\n");
-
-  // Upload the raw data that is within the rows of interest
-  return cudf::detail::make_device_uvector_async(
-    h_data.subspan(start_offset, bytes_to_upload), stream, rmm::mr::get_current_device_resource());
-}
-
-std::pair<std::vector<std::string>, col_map_ptr_type> get_column_names_and_map(
-  parse_options_view const& parse_opts,
-  host_span<char const> h_data,
-  device_span<uint64_t const> rec_starts,
-  device_span<char const> d_data,
-  rmm::cuda_stream_view stream)
-{
-  // If file only contains one row, use the file size for the row size
-  uint64_t first_row_len = d_data.size();
-  if (rec_starts.size() > 1) {
-    // Set first_row_len to the offset of the second row, if it exists
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      &first_row_len, rec_starts.data() + 1, sizeof(uint64_t), cudaMemcpyDefault, stream.value()));
-  }
-  std::vector<char> first_row(first_row_len);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(first_row.data(),
-                                d_data.data(),
-                                first_row_len * sizeof(char),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  stream.synchronize();
-
-  // Determine the row format between:
-  //   JSON array - [val1, val2, ...] and
-  //   JSON object - {"col1":val1, "col2":val2, ...}
-  // based on the top level opening bracket
-  auto const first_square_bracket = std::find(first_row.begin(), first_row.end(), '[');
-  auto const first_curly_bracket  = std::find(first_row.begin(), first_row.end(), '{');
-  CUDF_EXPECTS(first_curly_bracket != first_row.end() || first_square_bracket != first_row.end(),
-               "Input data is not a valid JSON file.");
-  // If the first opening bracket is '{', assume object format
-  if (first_curly_bracket < first_square_bracket) {
-    // use keys as column names if input rows are objects
-    return get_json_object_keys_hashes(parse_opts, h_data, rec_starts, d_data, stream);
-  } else {
-    int cols_found    = 0;
-    bool quotation    = false;
-    auto column_names = std::vector<std::string>();
-    for (size_t pos = 0; pos < first_row.size(); ++pos) {
-      // Flip the quotation flag if current character is a quotechar
-      if (first_row[pos] == parse_opts.quotechar) {
-        quotation = !quotation;
-      }
-      // Check if end of a column/row
-      else if (pos == first_row.size() - 1 ||
-               (!quotation && first_row[pos] == parse_opts.delimiter)) {
-        column_names.emplace_back(std::to_string(cols_found++));
-      }
-    }
-    return {column_names, col_map_type::create(0, stream)};
-  }
-}
-
-std::vector<data_type> get_data_types(json_reader_options const& reader_opts,
-                                      parse_options_view const& parse_opts,
-                                      std::vector<std::string> const& column_names,
-                                      col_map_type* column_map,
-                                      device_span<uint64_t const> rec_starts,
-                                      device_span<char const> data,
-                                      rmm::cuda_stream_view stream)
-{
-  bool has_to_infer_column_types =
-    std::visit([](auto const& dtypes) { return dtypes.empty(); }, reader_opts.get_dtypes());
-
-  if (!has_to_infer_column_types) {
-    return std::visit(
-      cudf::detail::visitor_overload{
-        [&](std::vector<data_type> const& dtypes) {
-          CUDF_EXPECTS(dtypes.size() == column_names.size(), "Must specify types for all columns");
-          return dtypes;
-        },
-        [&](std::map<std::string, data_type> const& dtypes) {
-          std::vector<data_type> sorted_dtypes;
-          std::transform(std::cbegin(column_names),
-                         std::cend(column_names),
-                         std::back_inserter(sorted_dtypes),
-                         [&](auto const& column_name) {
-                           auto const it = dtypes.find(column_name);
-                           CUDF_EXPECTS(it != dtypes.end(), "Must specify types for all columns");
-                           return it->second;
-                         });
-          return sorted_dtypes;
-        },
-        [&](std::map<std::string, schema_element> const& dtypes) {
-          std::vector<data_type> sorted_dtypes;
-          std::transform(std::cbegin(column_names),
-                         std::cend(column_names),
-                         std::back_inserter(sorted_dtypes),
-                         [&](auto const& column_name) {
-                           auto const it = dtypes.find(column_name);
-                           CUDF_EXPECTS(it != dtypes.end(), "Must specify types for all columns");
-                           return it->second.type;
-                         });
-          return sorted_dtypes;
-        }},
-      reader_opts.get_dtypes());
-  } else {
-    CUDF_EXPECTS(not rec_starts.empty(), "No data available for data type inference.\n");
-    auto const num_columns       = column_names.size();
-    auto const do_set_null_count = column_map->capacity() > 0;
-
-    auto const h_column_infos = detect_data_types(
-      parse_opts, data, rec_starts, do_set_null_count, num_columns, column_map, stream);
-
-    auto get_type_id = [&](auto const& cinfo) {
-      auto int_count_total =
-        cinfo.big_int_count + cinfo.negative_small_int_count + cinfo.positive_small_int_count;
-      if (cinfo.null_count == static_cast<int>(rec_starts.size())) {
-        // Entire column is NULL; allocate the smallest amount of memory
-        return type_id::INT8;
-      } else if (cinfo.string_count > 0) {
-        return type_id::STRING;
-      } else if (cinfo.datetime_count > 0) {
-        return type_id::TIMESTAMP_MILLISECONDS;
-      } else if (cinfo.float_count > 0) {
-        return type_id::FLOAT64;
-      } else if (cinfo.big_int_count == 0 && int_count_total != 0) {
-        return type_id::INT64;
-      } else if (cinfo.big_int_count != 0 && cinfo.negative_small_int_count != 0) {
-        return type_id::STRING;
-      } else if (cinfo.big_int_count != 0) {
-        return type_id::UINT64;
-      } else if (cinfo.bool_count > 0) {
-        return type_id::BOOL8;
-      } else {
-        CUDF_FAIL("Data type detection failed.\n");
-      }
-    };
-
-    std::vector<data_type> dtypes;
-
-    std::transform(std::cbegin(h_column_infos),
-                   std::cend(h_column_infos),
-                   std::back_inserter(dtypes),
-                   [&](auto const& cinfo) { return data_type{get_type_id(cinfo)}; });
-
-    return dtypes;
-  }
-}
-
-table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
-                                          std::vector<data_type> const& dtypes,
-                                          std::vector<std::string>&& column_names,
-                                          col_map_type* column_map,
-                                          device_span<uint64_t const> rec_starts,
-                                          device_span<char const> data,
-                                          rmm::cuda_stream_view stream,
-                                          rmm::device_async_resource_ref mr)
-{
-  auto const num_columns = dtypes.size();
-  auto const num_records = rec_starts.size();
-
-  // alloc output buffers.
-  std::vector<cudf::io::detail::column_buffer> out_buffers;
-  for (size_t col = 0; col < num_columns; ++col) {
-    out_buffers.emplace_back(dtypes[col], num_records, true, stream, mr);
-  }
-
-  thrust::host_vector<data_type> h_dtypes(num_columns);
-  thrust::host_vector<void*> h_data(num_columns);
-  thrust::host_vector<bitmask_type*> h_valid(num_columns);
-
-  for (size_t i = 0; i < num_columns; ++i) {
-    h_dtypes[i] = dtypes[i];
-    h_data[i]   = out_buffers[i].data();
-    h_valid[i]  = out_buffers[i].null_mask();
-  }
-
-  auto d_dtypes = cudf::detail::make_device_uvector_async<data_type>(
-    h_dtypes, stream, rmm::mr::get_current_device_resource());
-  auto d_data = cudf::detail::make_device_uvector_async<void*>(
-    h_data, stream, rmm::mr::get_current_device_resource());
-  auto d_valid = cudf::detail::make_device_uvector_async<cudf::bitmask_type*>(
-    h_valid, stream, rmm::mr::get_current_device_resource());
-  auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async<cudf::size_type>(
-    num_columns, stream, rmm::mr::get_current_device_resource());
-
-  convert_json_to_columns(
-    parse_opts, data, rec_starts, d_dtypes, column_map, d_data, d_valid, d_valid_counts, stream);
-
-  stream.synchronize();
-
-  // postprocess columns
-  auto target_chars   = std::vector<char>{'\\', '"', '\\', '\\', '\\', 't', '\\', 'r', '\\', 'b'};
-  auto target_offsets = std::vector<size_type>{0, 2, 4, 6, 8, 10};
-
-  auto repl_chars   = std::vector<char>{'"', '\\', '\t', '\r', '\b'};
-  auto repl_offsets = std::vector<size_type>{0, 1, 2, 3, 4, 5};
-
-  auto target =
-    make_strings_column(static_cast<size_type>(target_offsets.size() - 1),
-                        std::make_unique<cudf::column>(
-                          cudf::detail::make_device_uvector_async(
-                            target_offsets, stream, rmm::mr::get_current_device_resource()),
-                          rmm::device_buffer{},
-                          0),
-                        cudf::detail::make_device_uvector_async(
-                          target_chars, stream, rmm::mr::get_current_device_resource())
-                          .release(),
-                        0,
-                        {});
-  auto repl = make_strings_column(
-    static_cast<size_type>(repl_offsets.size() - 1),
-    std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
-                                     repl_offsets, stream, rmm::mr::get_current_device_resource()),
-                                   rmm::device_buffer{},
-                                   0),
-    cudf::detail::make_device_uvector_async(
-      repl_chars, stream, rmm::mr::get_current_device_resource())
-      .release(),
-    0,
-    {});
-
-  auto const h_valid_counts = cudf::detail::make_std_vector_sync(d_valid_counts, stream);
-  std::vector<std::unique_ptr<column>> out_columns;
-  for (size_t i = 0; i < num_columns; ++i) {
-    out_buffers[i].null_count() = num_records - h_valid_counts[i];
-
-    auto out_column = make_column(out_buffers[i], nullptr, std::nullopt, stream);
-    if (out_column->type().id() == type_id::STRING) {
-      // Need to remove escape character in case of '\"' and '\\'
-      out_columns.emplace_back(cudf::strings::detail::replace(
-        out_column->view(), target->view(), repl->view(), stream, mr));
-    } else {
-      out_columns.emplace_back(std::move(out_column));
-    }
-    if (out_columns.back()->null_count() == 0) {
-      out_columns.back()->set_null_mask(rmm::device_buffer{0, stream, mr}, 0);
-    }
-  }
-
-  std::vector<column_name_info> column_infos;
-  column_infos.reserve(column_names.size());
-  std::transform(std::make_move_iterator(column_names.begin()),
-                 std::make_move_iterator(column_names.end()),
-                 std::back_inserter(column_infos),
-                 [](auto const& col_name) { return column_name_info{col_name}; });
-
-  // This is to ensure the stream-ordered make_stream_column calls above complete before
-  // the temporary std::vectors are destroyed on exit from this function.
-  stream.synchronize();
-
-  CUDF_EXPECTS(!out_columns.empty(), "No columns created from json input");
-
-  return table_with_metadata{std::make_unique<table>(std::move(out_columns)), {column_infos}};
-}
-
-/**
- * @brief Read an entire set or a subset of data from the source
- *
- * @param[in] options reader options with Number of bytes offset from the start,
- * Bytes to read; use `0` for all remaining data
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @return Table and its metadata
- */
-table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
-                              json_reader_options const& reader_opts,
-                              rmm::cuda_stream_view stream,
-                              rmm::device_async_resource_ref mr)
-{
-  CUDF_EXPECTS(not sources.empty(), "No sources were defined");
-  CUDF_EXPECTS(sources.size() == 1 or reader_opts.get_compression() == compression_type::NONE,
-               "Multiple compressed inputs are not supported");
-  CUDF_EXPECTS(reader_opts.is_enabled_lines(), "Only JSON Lines format is currently supported.\n");
-
-  auto parse_opts = parse_options{',', '\n', '\"', '.'};
-
-  parse_opts.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
-  parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
-  parse_opts.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
-
-  parse_opts.dayfirst = reader_opts.is_enabled_dayfirst();
-
-  auto range_offset      = reader_opts.get_byte_range_offset();
-  auto range_size        = reader_opts.get_byte_range_size();
-  auto range_size_padded = reader_opts.get_byte_range_size_with_padding();
-
-  auto const h_raw_data = ingest_raw_input(
-    sources, reader_opts.get_compression(), range_offset, range_size, range_size_padded);
-  host_span<char const> h_data{reinterpret_cast<char const*>(h_raw_data.data()), h_raw_data.size()};
-
-  CUDF_EXPECTS(not h_data.empty(), "Ingest failed: uncompressed input data has zero size.\n");
-
-  auto d_data = rmm::device_uvector<char>(0, stream);
-
-  if (should_load_whole_source(reader_opts)) {
-    d_data = cudf::detail::make_device_uvector_async(
-      h_data, stream, rmm::mr::get_current_device_resource());
-  }
-
-  auto rec_starts = find_record_starts(reader_opts, h_data, d_data, stream);
-
-  CUDF_EXPECTS(rec_starts.size() > 0, "Error enumerating records.\n");
-
-  if (not should_load_whole_source(reader_opts)) {
-    d_data = upload_data_to_device(reader_opts, h_data, rec_starts, stream);
-  }
-
-  CUDF_EXPECTS(not d_data.is_empty(), "Error uploading input data to the GPU.\n");
-
-  auto column_names_and_map =
-    get_column_names_and_map(parse_opts.view(), h_data, rec_starts, d_data, stream);
-
-  auto column_names = std::get<0>(column_names_and_map);
-  auto column_map   = std::move(std::get<1>(column_names_and_map));
-
-  CUDF_EXPECTS(not column_names.empty(), "Error determining column names.\n");
-
-  auto dtypes = get_data_types(
-    reader_opts, parse_opts.view(), column_names, column_map.get(), rec_starts, d_data, stream);
-
-  CUDF_EXPECTS(not dtypes.empty(), "Error in data type detection.\n");
-
-  return convert_data_to_table(parse_opts.view(),
-                               dtypes,
-                               std::move(column_names),
-                               column_map.get(),
-                               rec_starts,
-                               d_data,
-                               stream,
-                               mr);
-}
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
index ea52dce020e..df5c7bc21e1 100644
--- a/cpp/src/io/json/read_json.cu
+++ b/cpp/src/io/json/read_json.cu
@@ -15,7 +15,6 @@
  */
 
 #include "io/comp/io_uncomp.hpp"
-#include "io/json/legacy/read_json.hpp"
 #include "io/json/nested_json.hpp"
 #include "read_json.hpp"
 
@@ -267,14 +266,6 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
 {
   CUDF_FUNC_RANGE();
 
-  // TODO remove this if-statement once legacy is removed
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  if (reader_opts.is_enabled_legacy()) {
-    return legacy::read_json(sources, reader_opts, stream, mr);
-  }
-#pragma GCC diagnostic pop
-
   if (reader_opts.get_byte_range_offset() != 0 or reader_opts.get_byte_range_size() != 0) {
     CUDF_EXPECTS(reader_opts.is_enabled_lines(),
                  "Specifying a byte range is supported only for JSON Lines");
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index db934818ae7..2b8c1b02b40 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -189,10 +189,6 @@ ConfigureTest(
   PERCENT 70
 )
 
-# ##################################################################################################
-# * hash_map tests --------------------------------------------------------------------------------
-ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
-
 # ##################################################################################################
 # * quantiles tests -------------------------------------------------------------------------------
 ConfigureTest(
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
deleted file mode 100644
index 4b10716706b..00000000000
--- a/cpp/tests/hash_map/map_test.cu
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hash/concurrent_unordered_map.cuh"
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/testing_main.hpp>
-
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/logical.h>
-#include <thrust/pair.h>
-#include <thrust/tabulate.h>
-
-#include <cstdlib>
-#include <iostream>
-#include <limits>
-#include <random>
-#include <unordered_map>
-#include <vector>
-
-template <typename K, typename V>
-struct key_value_types {
-  using key_type   = K;
-  using value_type = V;
-  using pair_type  = thrust::pair<K, V>;
-  using map_type   = concurrent_unordered_map<key_type, value_type>;
-};
-
-template <typename T>
-struct InsertTest : public cudf::test::BaseFixture {
-  using key_type   = typename T::key_type;
-  using value_type = typename T::value_type;
-  using pair_type  = typename T::pair_type;
-  using map_type   = typename T::map_type;
-
-  InsertTest()
-  {
-    // prevent overflow of small types
-    const size_t input_size =
-      std::min(static_cast<key_type>(size), std::numeric_limits<key_type>::max());
-    pairs.resize(input_size, cudf::get_default_stream());
-    map = std::move(map_type::create(compute_hash_table_size(size), cudf::get_default_stream()));
-    cudf::get_default_stream().synchronize();
-  }
-
-  const cudf::size_type size{10000};
-  rmm::device_uvector<pair_type> pairs{static_cast<std::size_t>(size), cudf::get_default_stream()};
-  std::unique_ptr<map_type, std::function<void(map_type*)>> map;
-};
-
-using TestTypes = ::testing::Types<key_value_types<int32_t, int32_t>,
-                                   key_value_types<int64_t, int64_t>,
-                                   key_value_types<int16_t, int16_t>,
-                                   key_value_types<int32_t, float>,
-                                   key_value_types<int64_t, double>>;
-
-TYPED_TEST_SUITE(InsertTest, TestTypes);
-
-template <typename map_type, typename pair_type>
-struct insert_pair {
-  insert_pair(map_type _map) : map{_map} {}
-
-  __device__ bool operator()(pair_type const& pair)
-  {
-    auto result = map.insert(pair);
-    if (result.first == map.end()) { return false; }
-    return result.second;
-  }
-
-  map_type map;
-};
-
-template <typename map_type, typename pair_type>
-struct find_pair {
-  find_pair(map_type _map) : map{_map} {}
-
-  __device__ bool operator()(pair_type const& pair)
-  {
-    auto result = map.find(pair.first);
-    if (result == map.end()) { return false; }
-    return *result == pair;
-  }
-  map_type map;
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct unique_pair_generator {
-  __device__ pair_type operator()(cudf::size_type i)
-  {
-    return thrust::make_pair(key_type(i), value_type(i));
-  }
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct identical_pair_generator {
-  identical_pair_generator(key_type k = 42, value_type v = 42) : key{k}, value{v} {}
-  __device__ pair_type operator()(cudf::size_type i) { return thrust::make_pair(key, value); }
-  key_type key;
-  value_type value;
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct identical_key_generator {
-  identical_key_generator(key_type k = 42) : key{k} {}
-  __device__ pair_type operator()(cudf::size_type i)
-  {
-    return thrust::make_pair(key, value_type(i));
-  }
-  key_type key;
-};
-
-TYPED_TEST(InsertTest, UniqueKeysUniqueValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   unique_pair_generator<pair_type>{});
-  // All pairs should be new inserts
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             insert_pair<map_type, pair_type>{*this->map}));
-
-  // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             find_pair<map_type, pair_type>{*this->map}));
-}
-
-TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   identical_pair_generator<pair_type>{});
-  // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             insert_pair<map_type, pair_type>{*this->map}));
-  // Identical inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin(),
-                              this->pairs.end(),
-                              insert_pair<map_type, pair_type>{*this->map}));
-
-  // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             find_pair<map_type, pair_type>{*this->map}));
-}
-
-TYPED_TEST(InsertTest, IdenticalKeysUniqueValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   identical_key_generator<pair_type>{});
-
-  // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             insert_pair<map_type, pair_type>{*this->map}));
-
-  // Identical key inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin() + 1,
-                              this->pairs.end(),
-                              insert_pair<map_type, pair_type>{*this->map}));
-
-  // Only first pair is present in map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             find_pair<map_type, pair_type>{*this->map}));
-
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin() + 1,
-                              this->pairs.end(),
-                              find_pair<map_type, pair_type>{*this->map}));
-}
-
-CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 35e6adf20e7..9d766e80094 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -264,13 +264,13 @@ struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest<DecimalTy
 TYPED_TEST_SUITE(JsonFixedPointReaderTest, cudf::test::FixedPointTypes);
 TYPED_TEST_SUITE(JsonValidFixedPointReaderTest, cudf::test::FixedPointTypes);
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for supported orients
 INSTANTIATE_TEST_CASE_P(JsonReaderParamTest,
                         JsonReaderParamTest,
                         ::testing::Values(json_test_t::json_record_orient,
                                           json_test_t::json_row_orient));
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for supported orients
 INSTANTIATE_TEST_CASE_P(JsonReaderRecordTest,
                         JsonReaderRecordTest,
                         ::testing::Values(json_test_t::json_record_orient));
@@ -917,7 +917,6 @@ TEST_F(JsonReaderTest, EmptyFile)
     outfile << "";
   }
 
-  // New reader only - legacy reader is strict about having non-empty input
   cudf::io::json_reader_options in_options =
     cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
   auto result = cudf::io::read_json(in_options);
@@ -934,7 +933,6 @@ TEST_F(JsonReaderTest, NoDataFile)
     outfile << "{}\n";
   }
 
-  // New reader only - legacy reader is strict about having non-empty input
   cudf::io::json_reader_options in_options =
     cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
   cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
@@ -1303,31 +1301,6 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputsNoNL)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2, 3.3, 4.4}});
 }
 
-// This can be removed once the legacy option has been removed.
-// The read_json only throws with legacy(true)
-TEST_F(JsonReaderTest, DISABLED_BadDtypeParams)
-{
-  std::string buffer = "[1,2,3,4]";
-
-  cudf::io::json_reader_options options_vec =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
-      .lines(true)
-      .dtypes({dtype<int8_t>()});
-
-  // should throw because there are four columns and only one dtype
-  EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error);
-
-  cudf::io::json_reader_options options_map =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
-      .lines(true)
-      .dtypes(std::map<std::string, cudf::data_type>{{"0", dtype<int8_t>()},
-                                                     {"1", dtype<int8_t>()},
-                                                     {"2", dtype<int8_t>()},
-                                                     {"wrong_name", dtype<int8_t>()}});
-  // should throw because one of the columns is not in the dtype map
-  EXPECT_THROW(cudf::io::read_json(options_map), cudf::logic_error);
-}
-
 TEST_F(JsonReaderTest, JsonBasic)
 {
   std::string const fname = temp_env->get_temp_dir() + "JsonBasic.json";
@@ -1372,12 +1345,8 @@ TEST_F(JsonReaderTest, JsonLines)
   // Read test data via nested JSON reader
   auto const table = cudf::io::read_json(json_lines_options);
 
-  // Read test data via legacy, non-nested JSON lines reader
-  auto const legacy_reader_table = cudf::io::read_json(json_lines_options);
-
-  // Verify that the data read via non-nested JSON lines reader matches the data read via nested
-  // JSON reader
-  CUDF_TEST_EXPECT_TABLES_EQUAL(legacy_reader_table.tbl->view(), table.tbl->view());
+  // TODO: Rewrite this test to check against a fixed value
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table.tbl->view(), table.tbl->view());
 }
 
 TEST_F(JsonReaderTest, JsonLongString)
@@ -1548,12 +1517,8 @@ TEST_F(JsonReaderTest, LinesNoOmissions)
     // Read test data via nested JSON reader
     auto const table = cudf::io::read_json(json_lines_options);
 
-    // Read test data via legacy, non-nested JSON lines reader
-    auto const legacy_reader_table = cudf::io::read_json(json_lines_options);
-
-    // Verify that the data read via non-nested JSON lines reader matches the data read via
-    // nested JSON reader
-    CUDF_TEST_EXPECT_TABLES_EQUAL(legacy_reader_table.tbl->view(), table.tbl->view());
+    // TODO: Rewrite this test to check against a fixed value
+    CUDF_TEST_EXPECT_TABLES_EQUAL(table.tbl->view(), table.tbl->view());
   }
 }
 
@@ -2440,7 +2405,7 @@ TEST_F(JsonReaderTest, MapTypes)
 struct JsonDelimiterParamTest : public cudf::test::BaseFixture,
                                 public testing::WithParamInterface<char> {};
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for multiple delimiters
 INSTANTIATE_TEST_SUITE_P(JsonDelimiterParamTest,
                          JsonDelimiterParamTest,
                          ::testing::Values('\n', '\b', '\v', '\f', 'h'));
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index d6f800cce8b..5dc25133719 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -248,7 +248,7 @@ TEST_F(JsonTest, StackContextUtf8)
 struct JsonDelimiterParamTest : public cudf::test::BaseFixture,
                                 public testing::WithParamInterface<char> {};
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for multiple delimiters
 INSTANTIATE_TEST_SUITE_P(JsonDelimiterParamTest,
                          JsonDelimiterParamTest,
                          ::testing::Values('\n', '\b', '\v', '\f', 'h'));
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 283a451dd4a..242727163ee 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -47,7 +47,6 @@ cpdef read_json(object filepaths_or_buffers,
                 bool lines,
                 object compression,
                 object byte_range,
-                bool legacy,
                 bool keep_quotes,
                 bool mixed_types_as_string,
                 bool prune_columns):
@@ -119,7 +118,6 @@ cpdef read_json(object filepaths_or_buffers,
         .lines(c_lines)
         .byte_range_offset(c_range_offset)
         .byte_range_size(c_range_size)
-        .legacy(legacy)
         .build()
     )
     if is_list_like_dtypes:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
index 7e64a4cae29..10e43467d57 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
@@ -87,9 +87,6 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& dayfirst(
             bool val
         ) except +
-        json_reader_options_builder& legacy(
-            bool val
-        ) except +
         json_reader_options_builder& keep_quotes(
             bool val
         ) except +
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 03d07fc3a50..7de9705e4cb 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -99,7 +99,6 @@ def read_json(
             lines,
             compression,
             byte_range,
-            False,
             keep_quotes,
             mixed_types_as_string,
             prune_columns,

From f873e238aa0e611f6352f7c91501a562eeaa6437 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 23 May 2024 18:32:58 -0400
Subject: [PATCH 06/15] Use rapids_cpm_nvtx3 to get same nvtx3 target state as
 rmm (#15840)

We need to use the `rapids_cpm_nvtx3` so that the nvtx3 targets, and setup are consistent across rmm and cudf. If we don't we get errors around incorrect exports when building statically or link errors when building shared.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/15840
---
 cpp/CMakeLists.txt                  |  4 ++--
 cpp/benchmarks/CMakeLists.txt       |  2 +-
 cpp/cmake/thirdparty/get_nvtx.cmake | 16 +++++++---------
 cpp/tests/CMakeLists.txt            |  4 ++--
 java/src/main/native/CMakeLists.txt |  2 +-
 5 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7390c465ccb..1eab51c8827 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -794,8 +794,8 @@ add_dependencies(cudf jitify_preprocess_run)
 target_link_libraries(
   cudf
   PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm
-  PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio
-          $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
+  PRIVATE $<BUILD_LOCAL_INTERFACE:nvtx3::nvtx3-cpp> cuco::cuco ZLIB::ZLIB nvcomp::nvcomp
+          kvikio::kvikio $<TARGET_NAME_IF_EXISTS:cuFile_interface> nanoarrow
 )
 
 # Add Conda library, and include paths if specified
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 170cf27b72b..10f645dfec0 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -25,7 +25,7 @@ target_compile_options(
 target_link_libraries(
   cudf_datagen
   PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf
-         cudftestutil nvtx3-cpp
+         cudftestutil nvtx3::nvtx3-cpp
   PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
 )
 
diff --git a/cpp/cmake/thirdparty/get_nvtx.cmake b/cpp/cmake/thirdparty/get_nvtx.cmake
index c722c4f70f1..e236d586522 100644
--- a/cpp/cmake/thirdparty/get_nvtx.cmake
+++ b/cpp/cmake/thirdparty/get_nvtx.cmake
@@ -12,16 +12,14 @@
 # the License.
 # =============================================================================
 
-# This function finds NVTX and sets any additional necessary environment variables.
+# Need to call rapids_cpm_nvtx3 to get support for an installed version of nvtx3 and to support
+# installing it ourselves
 function(find_and_configure_nvtx)
-  rapids_cpm_find(
-    NVTX3 3.1.0
-    GLOBAL_TARGETS nvtx3-c nvtx3-cpp
-    CPM_ARGS
-    GIT_REPOSITORY https://github.com/NVIDIA/NVTX.git
-    GIT_TAG v3.1.0
-    GIT_SHALLOW TRUE SOURCE_SUBDIR c
-  )
+  include(${rapids-cmake-dir}/cpm/nvtx3.cmake)
+
+  # Find or install nvtx3
+  rapids_cpm_nvtx3(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
+
 endfunction()
 
 find_and_configure_nvtx()
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index db934818ae7..7db9a06e809 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -56,8 +56,8 @@ function(ConfigureTest CMAKE_TEST_NAME)
 
   target_link_libraries(
     ${CMAKE_TEST_NAME}
-    PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main nvtx3-cpp
-            $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIB}"
+    PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main
+            nvtx3::nvtx3-cpp $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIB}"
   )
   rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
   rapids_test_add(
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 0d5339a1402..56f8f9d0472 100644
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -239,7 +239,7 @@ endif()
 # When nvcomp is installed we need to use nvcomp::nvcomp but from the cudf build directory it will
 # just be nvcomp.
 target_link_libraries(
-  cudfjni ${CUDF_LINK} PRIVATE nvtx3-cpp $<TARGET_NAME_IF_EXISTS:nvcomp>
+  cudfjni ${CUDF_LINK} PRIVATE nvtx3::nvtx3-cpp $<TARGET_NAME_IF_EXISTS:nvcomp>
                                $<TARGET_NAME_IF_EXISTS:nvcomp::nvcomp>
 )
 

From 8b5ff188e79bb79ca0c2d581e94d3a91654a2d31 Mon Sep 17 00:00:00 2001
From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com>
Date: Thu, 23 May 2024 20:32:30 -0400
Subject: [PATCH 07/15] Remove problematic call of index setter to unblock
 dask-cuda CI (#15844)

Lighter weight alternative to https://github.com/rapidsai/cudf/pull/15843 to unblock dask-cuda's breakage.

Authors:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/15844
---
 python/cudf/cudf/core/indexed_frame.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index a166c256689..394904c5855 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -350,7 +350,8 @@ def _from_columns_like_self(
         frame = self.__class__._from_data(data)
 
         if index is not None:
-            frame.index = index
+            # TODO: triage why using the setter here breaks dask_cuda.ProxifyHostFile
+            frame._index = index
         return frame._copy_type_metadata(
             self,
             include_index=bool(index_names),

From 72aa271a6ad8cfdcd4373ceadd777b4800fd26c4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 24 May 2024 06:24:37 -1000
Subject: [PATCH 08/15] Ensure cudf.Series(cudf.Series(...)) creates a
 reference to the same index (#15845)

Aligns these behaviors

```python
In [1]: import pandas as pd

In [3]: ser1 = pd.Series(range(3), index=list("Abc"))

In [4]: ser2 = pd.Series(ser1)

In [5]: ser1.index is ser2.index
Out[5]: True

In [6]: import cudf

In [7]: ser1 = cudf.Series(range(3), index=list("Abc"))

In [8]: ser2 = cudf.Series(ser1)

In [9]: ser1.index is ser2.index
Out[9]: False
```

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/15845
---
 python/cudf/cudf/core/series.py       | 4 +++-
 python/cudf/cudf/tests/test_series.py | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 41fbf269699..908347e389b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -595,8 +595,10 @@ def __init__(
                 data = data.copy(deep=True)
             name_from_data = data.name
             column = as_column(data, nan_as_null=nan_as_null, dtype=dtype)
-            if isinstance(data, (pd.Series, Series)):
+            if isinstance(data, pd.Series):
                 index_from_data = as_index(data.index)
+            elif isinstance(data, Series):
+                index_from_data = data.index
         elif isinstance(data, ColumnAccessor):
             raise TypeError(
                 "Use cudf.Series._from_data for constructing a Series from "
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 9aeae566730..323716d5fc3 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2835,3 +2835,9 @@ def test_timedelta_series_init(data):
     actual = cudf.Series(scalar)
 
     assert_eq(expected, actual)
+
+
+def test_series_from_series_index_no_shallow_copy():
+    ser1 = cudf.Series(range(3), index=list("abc"))
+    ser2 = cudf.Series(ser1)
+    assert ser1.index is ser2.index

From 8a405674a5ba1554a0ced5d1f39f89fb424a768d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 24 May 2024 11:24:39 -0500
Subject: [PATCH 09/15] Fix docs for IO readers and strings_convert (#15842)

Fixes documentation for IO readers and strings_convert.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/15842
---
 docs/cudf/source/libcudf_docs/api_docs/io_readers.rst      | 2 +-
 docs/cudf/source/libcudf_docs/api_docs/strings_convert.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/cudf/source/libcudf_docs/api_docs/io_readers.rst b/docs/cudf/source/libcudf_docs/api_docs/io_readers.rst
index a835673dee4..f94a5ddb403 100644
--- a/docs/cudf/source/libcudf_docs/api_docs/io_readers.rst
+++ b/docs/cudf/source/libcudf_docs/api_docs/io_readers.rst
@@ -2,4 +2,4 @@ Io Readers
 ==========
 
 .. doxygengroup:: io_readers
-   :desc-only:
+   :members:
diff --git a/docs/cudf/source/libcudf_docs/api_docs/strings_convert.rst b/docs/cudf/source/libcudf_docs/api_docs/strings_convert.rst
index ae5d78fb1a1..f2f320bd0e4 100644
--- a/docs/cudf/source/libcudf_docs/api_docs/strings_convert.rst
+++ b/docs/cudf/source/libcudf_docs/api_docs/strings_convert.rst
@@ -2,4 +2,4 @@ Strings Convert
 ===============
 
 .. doxygengroup:: strings_convert
-   :desc-only:
+   :members:

From 78a0314d809a24e26b86abecf8f935a4d4340550 Mon Sep 17 00:00:00 2001
From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com>
Date: Fri, 24 May 2024 12:40:28 -0400
Subject: [PATCH 10/15] Avoid unnecessary `Index` cast in `IndexedFrame.index`
 setter (#15843)

Triaging recent dask-cuda [breakage](https://github.com/rapidsai/dask-cuda/actions/runs/9202583065/attempts/1) led me to https://github.com/rapidsai/cudf/pull/15781, where it seems like the passing of an index object directly to the `IndexedFrame.index` setter (and therefore, wrapping of this index in an `Index()` constructor) has caused proxifying issues on dask-cuda's end.

cc @rjzamora @mroeschke

Authors:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/15843
---
 python/cudf/cudf/core/indexed_frame.py |  6 +++++-
 python/cudf/cudf/tests/test_index.py   | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 394904c5855..b4a689804c7 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -644,7 +644,11 @@ def index(self, value):
                 f"Length mismatch: Expected axis has {old_length} elements, "
                 f"new values have {len(value)} elements"
             )
-        self._index = Index(value)
+        # avoid unnecessary cast to Index
+        if not isinstance(value, BaseIndex):
+            value = Index(value)
+
+        self._index = value
 
     @_cudf_nvtx_annotate
     def replace(
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 8e7532d044d..b92ae1b3364 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -3266,3 +3266,17 @@ def test_index_datetime_repeat():
     actual = gidx.to_frame().repeat(5)
 
     assert_eq(actual.index, expected)
+
+
+@pytest.mark.parametrize(
+    "index",
+    [
+        cudf.Index([1]),
+        cudf.RangeIndex(1),
+        cudf.MultiIndex(levels=[[0]], codes=[[0]]),
+    ],
+)
+def test_index_assignment_no_shallow_copy(index):
+    df = cudf.DataFrame(range(1))
+    df.index = index
+    assert df.index is index

From 4a3315b55a89b2c92908eac8a6fd255a33843ba9 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Fri, 24 May 2024 13:46:27 -0500
Subject: [PATCH 11/15] Remove benchmark-specific use of pinned-pooled memory
 in Parquet multithreaded benchmark. (#15838)

The benchmark was manually creating and using a pinned-pool rmm allocator which is now redundant, since cuIO itself does this by default.  This PR removes it.

Authors:
  - https://github.com/nvdbaranec
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Paul Mattione (https://github.com/pmattione-nvidia)
  - Nghia Truong (https://github.com/ttnghia)
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/15838
---
 .../io/parquet/parquet_reader_multithread.cpp   | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
index fbdcfb0ade9..bd80c4e0e88 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp
@@ -25,25 +25,12 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/thread_pool.hpp>
 
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/mr/pinned_host_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
-
 #include <nvtx3/nvtx3.hpp>
 
 #include <nvbench/nvbench.cuh>
 
 #include <vector>
 
-// TODO: remove this once pinned/pooled is enabled by default in cuIO
-void set_cuio_host_pinned_pool()
-{
-  using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-  static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
-    std::make_shared<rmm::mr::pinned_host_memory_resource>().get(), 256ul * 1024 * 1024);
-  cudf::io::set_host_memory_resource(*mr);
-}
-
 size_t get_num_reads(nvbench::state const& state) { return state.get_int64("num_threads"); }
 
 size_t get_read_size(nvbench::state const& state)
@@ -105,8 +92,6 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state,
   size_t const data_size = state.get_int64("total_data_size");
   auto const num_threads = state.get_int64("num_threads");
 
-  set_cuio_host_pinned_pool();
-
   auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
   cudf::detail::thread_pool threads(num_threads);
 
@@ -186,8 +171,6 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state,
   size_t const input_limit  = state.get_int64("input_limit");
   size_t const output_limit = state.get_int64("output_limit");
 
-  set_cuio_host_pinned_pool();
-
   auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
   cudf::detail::thread_pool threads(num_threads);
   auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);

From 81cadb60b9cb8840e1700ecc223f651c97618e34 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 24 May 2024 10:20:21 -1000
Subject: [PATCH 12/15] Use ColumnAccessor row and column length attributes
 more consistently (#15857)

Also ensures any calls to `_num_rows` uses the cached version

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/15857
---
 python/cudf/cudf/core/dataframe.py     | 29 +++++++++++++-------------
 python/cudf/cudf/core/frame.py         |  2 +-
 python/cudf/cudf/core/indexed_frame.py |  8 ++++---
 python/cudf/cudf/core/multiindex.py    |  2 +-
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1f530aa3108..acfc2d781a7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1429,7 +1429,7 @@ def __setitem__(self, arg, value):
                 else:
                     # disc. with pandas here
                     # pandas raises key error here
-                    self.insert(len(self._data), arg, value)
+                    self.insert(self._num_columns, arg, value)
 
         elif can_convert_to_column(arg):
             mask = arg
@@ -1846,7 +1846,7 @@ def _clean_renderable_dataframe(self, output):
         if lines[-1].startswith("["):
             lines = lines[:-1]
             lines.append(
-                "[%d rows x %d columns]" % (len(self), len(self._data.names))
+                "[%d rows x %d columns]" % (len(self), self._num_columns)
             )
         return "\n".join(lines)
 
@@ -1901,7 +1901,7 @@ def _get_renderable_dataframe(self):
             else pd.options.display.width / 2
         )
 
-        if len(self) <= nrows and len(self._data.names) <= ncols:
+        if len(self) <= nrows and self._num_columns <= ncols:
             output = self.copy(deep=False)
         elif self.empty and len(self.index) > 0:
             max_seq_items = pd.options.display.max_seq_items
@@ -1922,15 +1922,15 @@ def _get_renderable_dataframe(self):
             else:
                 output = self.copy(deep=False)
         else:
-            left_cols = len(self._data.names)
+            left_cols = self._num_columns
             right_cols = 0
             upper_rows = len(self)
             lower_rows = 0
             if len(self) > nrows and nrows > 0:
                 upper_rows = int(nrows / 2.0) + 1
                 lower_rows = upper_rows + (nrows % 2)
-            if len(self._data.names) > ncols:
-                right_cols = len(self._data.names) - int(ncols / 2.0)
+            if left_cols > ncols:
+                right_cols = left_cols - int(ncols / 2.0)
                 # adjust right columns for output if multiindex.
                 right_cols = (
                     right_cols - 1
@@ -1945,11 +1945,11 @@ def _get_renderable_dataframe(self):
             else:
                 # If right_cols is 0 or negative, it means
                 # self has lesser number of columns than ncols.
-                # Hence assign len(self._data.names) which
+                # Hence assign self._num_columns which
                 # will result in empty `*_right` quadrants.
                 # This is because `*_left` quadrants will
                 # contain all columns.
-                right_cols = len(self._data.names)
+                right_cols = self._num_columns
 
             upper_left = self.head(upper_rows).iloc[:, :left_cols]
             upper_right = self.head(upper_rows).iloc[:, right_cols:]
@@ -1983,8 +1983,7 @@ def _repr_html_(self):
         if lines[-2].startswith("<p>"):
             lines = lines[:-2]
             lines.append(
-                "<p>%d rows × %d columns</p>"
-                % (len(self), len(self._data.names))
+                "<p>%d rows × %d columns</p>" % (len(self), self._num_columns)
             )
             lines.append("</div>")
         return "\n".join(lines)
@@ -2660,9 +2659,9 @@ def columns(self, columns):
             level_names = (pd_columns.name,)
             label_dtype = pd_columns.dtype
 
-        if len(pd_columns) != len(self._data.names):
+        if len(pd_columns) != self._num_columns:
             raise ValueError(
-                f"Length mismatch: expected {len(self._data.names)} elements, "
+                f"Length mismatch: expected {self._num_columns} elements, "
                 f"got {len(pd_columns)} elements"
             )
 
@@ -2683,7 +2682,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None:
         * The possible .columns.dtype
         * The .columns.names/name (depending on if it's a MultiIndex)
         """
-        if len(self._data.names) != len(other.names):
+        if self._num_columns != len(other.names):
             raise ValueError(
                 f"Length mismatch: expected {len(other)} elements, "
                 f"got {len(self)} elements"
@@ -3207,7 +3206,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
         if name in self._data:
             raise NameError(f"duplicated column name {name}")
 
-        num_cols = len(self._data)
+        num_cols = self._num_columns
         if loc < 0:
             loc += num_cols + 1
 
@@ -5032,7 +5031,7 @@ def info(
         )
         lines.append(index_summary)
 
-        if len(self._data) == 0:
+        if self._num_columns == 0:
             lines.append(f"Empty {type(self).__name__}")
             cudf.utils.ioutils.buffer_write_lines(buf, lines)
             return
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 92ca76d6ceb..7b561906afb 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -76,7 +76,7 @@ def _num_columns(self) -> int:
 
     @property
     def _num_rows(self) -> int:
-        return 0 if self._num_columns == 0 else len(self._data.columns[0])
+        return self._data.nrows
 
     @property
     def _column_names(self) -> Tuple[Any, ...]:  # TODO: Tuple[str]?
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index b4a689804c7..a31430e1571 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -289,6 +289,7 @@ def __init__(self, data=None, index=None):
     @property
     def _num_rows(self) -> int:
         # Important to use the index because the data may be empty.
+        # TODO: Remove once DataFrame.__init__ is cleaned up
         return len(self.index)
 
     @property
@@ -448,6 +449,7 @@ def _scan(self, op, axis=None, skipna=True):
     def _check_data_index_length_match(self) -> None:
         # Validate that the number of rows in the data matches the index if the
         # data is not empty. This is a helper for the constructor.
+        # TODO: Use self._num_rows once DataFrame.__init__ is cleaned up
         if self._data.nrows > 0 and self._data.nrows != len(self.index):
             raise ValueError(
                 f"Length of values ({self._data.nrows}) does not "
@@ -639,7 +641,7 @@ def index(self, value):
         new_length = len(value)
 
         # A DataFrame with 0 columns can have an index of arbitrary length.
-        if len(self._data) > 0 and new_length != old_length:
+        if self._num_columns > 0 and new_length != old_length:
             raise ValueError(
                 f"Length mismatch: Expected axis has {old_length} elements, "
                 f"new values have {len(value)} elements"
@@ -1129,7 +1131,7 @@ def dot(self, other, reflect=False):
             common = self._data.to_pandas_index().union(
                 other.index.to_pandas()
             )
-            if len(common) > len(self._data.names) or len(common) > len(
+            if len(common) > self._num_columns or len(common) > len(
                 other.index
             ):
                 raise ValueError("matrices are not aligned")
@@ -2757,7 +2759,7 @@ def sort_index(
             out = self[labels]
             if ignore_index:
                 out._data.rangeindex = True
-                out._data.names = list(range(len(self._data.names)))
+                out._data.names = list(range(self._num_columns))
 
         return self._mimic_inplace(out, inplace=inplace)
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index c149a1028a0..049fac45ba8 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -527,7 +527,7 @@ def get_slice_bound(self, label, side, kind=None):
     @_cudf_nvtx_annotate
     def nlevels(self):
         """Integer number of levels in this MultiIndex."""
-        return len(self._data)
+        return self._num_columns
 
     @property  # type: ignore
     @_cudf_nvtx_annotate

From d756c37ef3a9625862df849e03b503d990dc411b Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 24 May 2024 15:35:31 -0500
Subject: [PATCH 13/15] Implement `on_bad_lines` in json reader (#15834)

Fixes: #15559

This PR implements `on_bad_lines` in json reader. When `on_bad_lines="recover"`, bad lines are replaced by `<NA>` values.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/15834
---
 python/cudf/cudf/_lib/json.pyx                | 15 ++++++++-
 .../cudf/_lib/pylibcudf/libcudf/io/json.pxd   |  7 +++++
 python/cudf/cudf/io/json.py                   | 18 ++++++-----
 python/cudf/cudf/tests/test_json.py           | 31 +++++++++++++++++++
 python/cudf/cudf/utils/ioutils.py             |  5 +++
 5 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 242727163ee..a8fef907bad 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -24,6 +24,7 @@ from cudf._lib.io.utils cimport (
 from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
 from cudf._lib.pylibcudf.libcudf.io.json cimport (
     json_reader_options,
+    json_recovery_mode_t,
     json_writer_options,
     read_json as libcudf_read_json,
     schema_element,
@@ -42,6 +43,15 @@ from cudf._lib.types cimport dtype_to_data_type
 from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
 
 
+cdef json_recovery_mode_t _get_json_recovery_mode(object on_bad_lines):
+    if on_bad_lines.lower() == "error":
+        return json_recovery_mode_t.FAIL
+    elif on_bad_lines.lower() == "recover":
+        return json_recovery_mode_t.RECOVER_WITH_NULL
+    else:
+        raise TypeError(f"Invalid parameter for {on_bad_lines=}")
+
+
 cpdef read_json(object filepaths_or_buffers,
                 object dtype,
                 bool lines,
@@ -49,7 +59,8 @@ cpdef read_json(object filepaths_or_buffers,
                 object byte_range,
                 bool keep_quotes,
                 bool mixed_types_as_string,
-                bool prune_columns):
+                bool prune_columns,
+                object on_bad_lines):
     """
     Cython function to call into libcudf API, see `read_json`.
 
@@ -118,6 +129,7 @@ cpdef read_json(object filepaths_or_buffers,
         .lines(c_lines)
         .byte_range_offset(c_range_offset)
         .byte_range_size(c_range_size)
+        .recovery_mode(_get_json_recovery_mode(on_bad_lines))
         .build()
     )
     if is_list_like_dtypes:
@@ -128,6 +140,7 @@ cpdef read_json(object filepaths_or_buffers,
     opts.enable_keep_quotes(keep_quotes)
     opts.enable_mixed_types_as_string(mixed_types_as_string)
     opts.enable_prune_columns(prune_columns)
+
     # Read JSON
     cdef cudf_io_types.table_with_metadata c_result
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
index 10e43467d57..2e50cccd132 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
@@ -19,6 +19,10 @@ cdef extern from "cudf/io/json.hpp" \
         data_type type
         map[string, schema_element] child_types
 
+    cdef enum json_recovery_mode_t:
+        FAIL "cudf::io::json_recovery_mode_t::FAIL"
+        RECOVER_WITH_NULL "cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL"
+
     cdef cppclass json_reader_options:
         json_reader_options() except +
         cudf_io_types.source_info get_source() except +
@@ -90,6 +94,9 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& keep_quotes(
             bool val
         ) except +
+        json_reader_options_builder& recovery_mode(
+            json_recovery_mode_t val
+        ) except +
 
         json_reader_options build() except +
 
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 7de9705e4cb..dd4a0d9eb07 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -27,6 +27,7 @@ def read_json(
     storage_options=None,
     mixed_types_as_string=False,
     prune_columns=False,
+    on_bad_lines="error",
     *args,
     **kwargs,
 ):
@@ -94,14 +95,15 @@ def read_json(
                 filepaths_or_buffers.append(tmp_source)
 
         df = libjson.read_json(
-            filepaths_or_buffers,
-            dtype,
-            lines,
-            compression,
-            byte_range,
-            keep_quotes,
-            mixed_types_as_string,
-            prune_columns,
+            filepaths_or_buffers=filepaths_or_buffers,
+            dtype=dtype,
+            lines=lines,
+            compression=compression,
+            byte_range=byte_range,
+            keep_quotes=keep_quotes,
+            mixed_types_as_string=mixed_types_as_string,
+            prune_columns=prune_columns,
+            on_bad_lines=on_bad_lines,
         )
     else:
         warnings.warn(
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 51287fe26a0..ba6a8f94719 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -1392,3 +1392,34 @@ def test_json_nested_mixed_types_error(jsonl_string):
             orient="records",
             lines=True,
         )
+
+
+@pytest.mark.parametrize("on_bad_lines", ["error", "recover", "abc"])
+def test_json_reader_on_bad_lines(on_bad_lines):
+    json_input = StringIO(
+        '{"a":1,"b":10}\n{"a":2,"b":11}\nabc\n{"a":3,"b":12}\n'
+    )
+    if on_bad_lines == "error":
+        with pytest.raises(RuntimeError):
+            cudf.read_json(
+                json_input,
+                lines=True,
+                orient="records",
+                on_bad_lines=on_bad_lines,
+            )
+    elif on_bad_lines == "recover":
+        actual = cudf.read_json(
+            json_input, lines=True, orient="records", on_bad_lines=on_bad_lines
+        )
+        expected = cudf.DataFrame(
+            {"a": [1, 2, None, 3], "b": [10, 11, None, 12]}
+        )
+        assert_eq(actual, expected)
+    else:
+        with pytest.raises(TypeError):
+            cudf.read_json(
+                json_input,
+                lines=True,
+                orient="records",
+                on_bad_lines=on_bad_lines,
+            )
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 1366a0b8e84..0209c692935 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -739,6 +739,11 @@
 
     If True, only return those columns mentioned in the dtype argument.
     If `False` dtype argument is used a type inference suggestion.
+on_bad_lines : {'error', 'recover'}, default 'error'
+    Specifies what to do upon encountering a bad line. Allowed values are :
+
+    - ``'error'``, raise an Exception when a bad line is encountered.
+    - ``'recover'``, fills the row with <NA> when a bad line is encountered.
 Returns
 -------
 result : Series or DataFrame, depending on the value of `typ`.

From 8458306ecbc17d3977a98e2e33752b678394f588 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Fri, 24 May 2024 15:04:08 -0700
Subject: [PATCH 14/15] Migrate reshape.pxd to pylibcudf (#15827)

xref #15162

Authors:
  - Thomas Li (https://github.com/lithomas1)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/15827
---
 .../user_guide/api_docs/pylibcudf/index.rst   |  1 +
 .../user_guide/api_docs/pylibcudf/reshape.rst |  6 ++
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  1 +
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd  |  1 +
 python/cudf/cudf/_lib/pylibcudf/__init__.py   |  1 +
 python/cudf/cudf/_lib/pylibcudf/reshape.pxd   | 11 ++++
 python/cudf/cudf/_lib/pylibcudf/reshape.pyx   | 65 +++++++++++++++++++
 python/cudf/cudf/_lib/reshape.pyx             | 42 +++++-------
 .../cudf/cudf/pylibcudf_tests/test_reshape.py | 43 ++++++++++++
 9 files changed, 147 insertions(+), 24 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/reshape.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/reshape.pyx
 create mode 100644 python/cudf/cudf/pylibcudf_tests/test_reshape.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index 8cad95f61ae..1c1b37e2c37 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -20,6 +20,7 @@ This page provides API documentation for pylibcudf.
     lists
     merge
     reduce
+    reshape
     rolling
     scalar
     search
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
new file mode 100644
index 00000000000..964cef04923
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
@@ -0,0 +1,6 @@
+=======
+reshape
+=======
+
+.. automodule:: cudf._lib.pylibcudf.reshape
+   :members:
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index efc978fc6d0..7d01671e84f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -27,6 +27,7 @@ set(cython_sources
     merge.pyx
     reduce.pyx
     replace.pyx
+    reshape.pyx
     rolling.pyx
     scalar.pyx
     search.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index 5adefa5fd93..91c3fdf5602 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -13,6 +13,7 @@ from . cimport (
     merge,
     reduce,
     replace,
+    reshape,
     rolling,
     search,
     sorting,
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 89f874f5fa5..fcdc4992f00 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -13,6 +13,7 @@
     merge,
     reduce,
     replace,
+    reshape,
     rolling,
     search,
     sorting,
diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pxd b/python/cudf/cudf/_lib/pylibcudf/reshape.pxd
new file mode 100644
index 00000000000..a7cc45d7a08
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/reshape.pxd
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from cudf._lib.pylibcudf.libcudf.types cimport size_type
+
+from .column cimport Column
+from .scalar cimport Scalar
+from .table cimport Table
+
+
+cpdef Column interleave_columns(Table source_table)
+cpdef Table tile(Table source_table, size_type count)
diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pyx b/python/cudf/cudf/_lib/pylibcudf/reshape.pyx
new file mode 100644
index 00000000000..b68eba48cd6
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/reshape.pyx
@@ -0,0 +1,65 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.pylibcudf.libcudf.column.column cimport column
+from cudf._lib.pylibcudf.libcudf.reshape cimport (
+    interleave_columns as cpp_interleave_columns,
+    tile as cpp_tile,
+)
+from cudf._lib.pylibcudf.libcudf.table.table cimport table
+from cudf._lib.pylibcudf.libcudf.types cimport size_type
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef Column interleave_columns(Table source_table):
+    """Interleave columns of a table into a single column.
+
+    Converts the column major table `input` into a row major column.
+
+    Example:
+    in     = [[A1, A2, A3], [B1, B2, B3]]
+    return = [A1, B1, A2, B2, A3, B3]
+
+    Parameters
+    ----------
+    source_table: Table
+        The input table to interleave
+
+    Returns
+    -------
+    Column
+        A new column which is the result of interleaving the input columns
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_interleave_columns(source_table.view()))
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Table tile(Table source_table, size_type count):
+    """Repeats the rows from input table count times to form a new table.
+
+    Parameters
+    ----------
+    source_table: Table
+        The input table containing rows to be repeated
+    count: size_type
+        The number of times to tile "rows". Must be non-negative
+
+    Returns
+    -------
+    Table
+        The table containing the tiled "rows"
+    """
+    cdef unique_ptr[table] c_result
+
+    with nogil:
+        c_result = move(cpp_tile(source_table.view(), count))
+
+    return Table.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index 48e386bcf02..6bba8f0df35 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -2,39 +2,33 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf._lib.column cimport Column
-from cudf._lib.pylibcudf.libcudf.column.column cimport column
-from cudf._lib.pylibcudf.libcudf.reshape cimport (
-    interleave_columns as cpp_interleave_columns,
-    tile as cpp_tile,
-)
-from cudf._lib.pylibcudf.libcudf.table.table cimport table
-from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
 from cudf._lib.pylibcudf.libcudf.types cimport size_type
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+from cudf._lib.utils cimport columns_from_pylibcudf_table
+
+import cudf._lib.pylibcudf as plc
 
 
 @acquire_spill_lock()
 def interleave_columns(list source_columns):
-    cdef table_view c_view = table_view_from_columns(source_columns)
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_interleave_columns(c_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(
+        plc.reshape.interleave_columns(
+            plc.Table([
+                c.to_pylibcudf(mode="read") for c in source_columns
+            ])
+        )
+    )
 
 
 @acquire_spill_lock()
 def tile(list source_columns, size_type count):
     cdef size_type c_count = count
-    cdef table_view c_view = table_view_from_columns(source_columns)
-    cdef unique_ptr[table] c_result
-
-    with nogil:
-        c_result = move(cpp_tile(c_view, c_count))
 
-    return columns_from_unique_ptr(move(c_result))
+    return columns_from_pylibcudf_table(
+        plc.reshape.tile(
+            plc.Table([
+                c.to_pylibcudf(mode="read") for c in source_columns
+            ]),
+            c_count
+        )
+    )
diff --git a/python/cudf/cudf/pylibcudf_tests/test_reshape.py b/python/cudf/cudf/pylibcudf_tests/test_reshape.py
new file mode 100644
index 00000000000..b8b914f3f09
--- /dev/null
+++ b/python/cudf/cudf/pylibcudf_tests/test_reshape.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pytest
+from utils import assert_column_eq, assert_table_eq
+
+from cudf._lib import pylibcudf as plc
+
+
+@pytest.fixture(scope="module")
+def reshape_data():
+    data = [[1, 2, 3], [4, 5, 6]]
+    return data
+
+
+@pytest.fixture(scope="module")
+def reshape_plc_tbl(reshape_data):
+    arrow_tbl = pa.Table.from_arrays(reshape_data, names=["a", "b"])
+    plc_tbl = plc.interop.from_arrow(arrow_tbl)
+    return plc_tbl
+
+
+def test_interleave_columns(reshape_data, reshape_plc_tbl):
+    res = plc.reshape.interleave_columns(reshape_plc_tbl)
+
+    interleaved_data = [pa.array(pair) for pair in zip(*reshape_data)]
+
+    expect = pa.concat_arrays(interleaved_data)
+
+    assert_column_eq(res, expect)
+
+
+@pytest.mark.parametrize("cnt", [0, 1, 3])
+def test_tile(reshape_data, reshape_plc_tbl, cnt):
+    res = plc.reshape.tile(reshape_plc_tbl, cnt)
+
+    tiled_data = [pa.array(col * cnt) for col in reshape_data]
+
+    expect = pa.Table.from_arrays(
+        tiled_data, schema=plc.interop.to_arrow(reshape_plc_tbl).schema
+    )
+
+    assert_table_eq(res, expect)

From 29429f7e4c871758c0de930026347e6e3b0a5a9a Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Tue, 28 May 2024 05:47:58 -0700
Subject: [PATCH 15/15] Work around issues with cccl main (#15552)

This gets cuDF build cccl main on 12.3.

There is one issue with the cuco tuple helpers but that will be fixed on
the cuco side

---------

Co-authored-by: Bernhard Manfred Gruber <bgruber@nvidia.com>
Co-authored-by: Bradley Dice <bdice@bradleydice.com>
Co-authored-by: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Co-authored-by: ptaylor <paul.e.taylor@me.com>
Co-authored-by: Paul Taylor <178183+trxcllnt@users.noreply.github.com>
Co-authored-by: Yunsong Wang <yunsongw@nvidia.com>
---
 .pre-commit-config.yaml                       |  2 +
 .../thirdparty/patches/cccl_override.json     | 20 ++++++++
 .../patches/revert_pr_211_cccl_2.5.0.diff     | 47 +++++++++++++++++++
 ..._disable_64bit_dispatching_cccl_2.5.0.diff | 25 ++++++++++
 ..._faster_scan_compile_times_cccl_2.5.0.diff | 39 +++++++++++++++
 ..._faster_sort_compile_times_cccl_2.5.0.diff | 39 +++++++++++++++
 cpp/src/io/comp/statistics.cu                 |  9 ++--
 cpp/src/io/orc/reader_impl_decode.cu          |  3 +-
 cpp/src/io/orc/stripe_init.cu                 | 22 +++++----
 cpp/src/io/parquet/page_string_decode.cu      | 13 +++--
 cpp/src/io/parquet/reader_impl_preprocess.cu  |  4 +-
 cpp/src/io/utilities/data_casting.cu          |  6 ++-
 cpp/src/join/distinct_hash_join.cu            |  2 +-
 cpp/src/strings/split/split_re.cu             |  4 +-
 cpp/tests/hash_map/map_test.cu                |  1 -
 15 files changed, 209 insertions(+), 27 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5a8d9f54673..2d3ffc287e9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,11 +7,13 @@ repos:
       - id: trailing-whitespace
         exclude: |
           (?x)^(
+            ^cpp/cmake/thirdparty/patches/.*|
             ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
           )
       - id: end-of-file-fixer
         exclude: |
           (?x)^(
+            ^cpp/cmake/thirdparty/patches/.*|
             ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
           )
   - repo: https://github.com/PyCQA/isort
diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json
index b33f17f3e4a..059f713e7a5 100644
--- a/cpp/cmake/thirdparty/patches/cccl_override.json
+++ b/cpp/cmake/thirdparty/patches/cccl_override.json
@@ -18,6 +18,11 @@
           "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
           "fixed_in" : ""
         },
+        {
+          "file" : "${current_json_dir}/revert_pr_211_cccl_2.5.0.diff",
+          "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
+          "fixed_in" : ""
+        },
         {
           "file": "cccl/kernel_pointer_hiding.diff",
           "issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]",
@@ -28,15 +33,30 @@
           "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
           "fixed_in" : ""
         },
+        {
+          "file" : "${current_json_dir}/thrust_disable_64bit_dispatching_cccl_2.5.0.diff",
+          "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
+          "fixed_in" : ""
+        },
         {
           "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
           "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
           "fixed_in" : ""
         },
+        {
+          "file" : "${current_json_dir}/thrust_faster_sort_compile_times_cccl_2.5.0.diff",
+          "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
+          "fixed_in" : ""
+        },
         {
           "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
           "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
           "fixed_in" : ""
+        },
+        {
+          "file" : "${current_json_dir}/thrust_faster_scan_compile_times_cccl_2.5.0.diff",
+          "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
+          "fixed_in" : ""
         }
       ]
     }
diff --git a/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
new file mode 100644
index 00000000000..27ff16744f5
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
@@ -0,0 +1,47 @@
+diff --git a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
+index 046eb83c0..8047c9701 100644
+--- a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
++++ b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
+@@ -53,41 +53,15 @@ namespace cuda_cub
+ 
+ namespace __copy
+ {
+-template <class Derived, class InputIt, class OutputIt>
+-OutputIt THRUST_RUNTIME_FUNCTION device_to_device(
+-  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::true_type)
+-{
+-  typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
+-  const auto n = thrust::distance(first, last);
+-  if (n > 0)
+-  {
+-    cudaError status;
+-    status = trivial_copy_device_to_device(
+-      policy,
+-      reinterpret_cast<InputTy*>(thrust::raw_pointer_cast(&*result)),
+-      reinterpret_cast<InputTy const*>(thrust::raw_pointer_cast(&*first)),
+-      n);
+-    cuda_cub::throw_on_error(status, "__copy:: D->D: failed");
+-  }
+-
+-  return result + n;
+-}
+ 
+ template <class Derived, class InputIt, class OutputIt>
+ OutputIt THRUST_RUNTIME_FUNCTION device_to_device(
+-  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::false_type)
++  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result)
+ {
+   typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
+   return cuda_cub::transform(policy, first, last, result, thrust::identity<InputTy>());
+ }
+ 
+-template <class Derived, class InputIt, class OutputIt>
+-OutputIt THRUST_RUNTIME_FUNCTION
+-device_to_device(execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result)
+-{
+-  return device_to_device(
+-    policy, first, last, result, typename is_indirectly_trivially_relocatable_to<InputIt, OutputIt>::type());
+-}
+ } // namespace __copy
+ 
+ } // namespace cuda_cub
diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
new file mode 100644
index 00000000000..6ae1e1c917b
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
@@ -0,0 +1,25 @@
+diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
+index 2a3cc4e33..8fb337b26 100644
+--- a/thrust/thrust/system/cuda/detail/dispatch.h
++++ b/thrust/thrust/system/cuda/detail/dispatch.h
+@@ -44,8 +44,7 @@
+   }                                                                                   \
+   else                                                                                \
+   {                                                                                   \
+-    auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
+-    status                             = call arguments;                              \
++    throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+   }
+ 
+ /**
+@@ -66,9 +65,7 @@
+   }                                                                                          \
+   else                                                                                       \
+   {                                                                                          \
+-    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1);      \
+-    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2);      \
+-    status                              = call arguments;                                    \
++    throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+   }
+ /**
+  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
new file mode 100644
index 00000000000..fee46046194
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
@@ -0,0 +1,39 @@
+diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+index 0606485bb..dbb99ff13 100644
+--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
++++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+@@ -1085,7 +1085,7 @@ struct DeviceRadixSortPolicy
+   };
+ 
+   /// SM60 (GP100)
+-  struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
++  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+   {
+     enum
+     {
+diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
+index f39613adb..75bd16ff9 100644
+--- a/cub/cub/device/dispatch/dispatch_reduce.cuh
++++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
+@@ -488,7 +488,7 @@ struct DeviceReducePolicy
+   };
+ 
+   /// SM60
+-  struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
++  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+   {
+     static constexpr int threads_per_block  = 256;
+     static constexpr int items_per_thread   = 16;
+diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+index 419908c4e..6ab0840e1 100644
+--- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
++++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
+@@ -339,7 +339,7 @@ struct DeviceScanPolicy
+   /// SM600
+   struct Policy600
+       : DefaultTuning
+-      , ChainedPolicy<600, Policy600, Policy520>
++      , ChainedPolicy<600, Policy600, Policy600>
+   {};
+ 
+   /// SM800
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff
new file mode 100644
index 00000000000..cb0cc55f4d2
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff
@@ -0,0 +1,39 @@
+diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
+index eb76ebb0b..c6c529a50 100644
+--- a/cub/cub/block/block_merge_sort.cuh
++++ b/cub/cub/block/block_merge_sort.cuh
+@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
+   KeyT key1 = keys_shared[keys1_beg];
+   KeyT key2 = keys_shared[keys2_beg];
+ 
+-#pragma unroll
++#pragma unroll 1
+   for (int item = 0; item < ITEMS_PER_THREAD; ++item)
+   {
+     bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
+@@ -376,7 +376,7 @@ public:
+       //
+       KeyT max_key = oob_default;
+ 
+-#pragma unroll
++#pragma unroll 1
+       for (int item = 1; item < ITEMS_PER_THREAD; ++item)
+       {
+         if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
+diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
+index 7d9e8622f..da5627306 100644
+--- a/cub/cub/thread/thread_sort.cuh
++++ b/cub/cub/thread/thread_sort.cuh
+@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
+ {
+   constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;
+ 
+-#pragma unroll
++#pragma unroll 1
+   for (int i = 0; i < ITEMS_PER_THREAD; ++i)
+   {
+-#pragma unroll
++#pragma unroll 1
+     for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
+     {
+       if (compare_op(keys[j + 1], keys[j]))
diff --git a/cpp/src/io/comp/statistics.cu b/cpp/src/io/comp/statistics.cu
index 2a9eb782800..faf967041bc 100644
--- a/cpp/src/io/comp/statistics.cu
+++ b/cpp/src/io/comp/statistics.cu
@@ -18,6 +18,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <cuda/functional>
 #include <thrust/transform_reduce.h>
 
 namespace cudf::io {
@@ -32,9 +33,9 @@ writer_compression_statistics collect_compression_statistics(
     rmm::exec_policy(stream),
     results.begin(),
     results.end(),
-    [] __device__(auto& res) {
+    cuda::proclaim_return_type<size_t>([] __device__(compression_result const& res) {
       return res.status == compression_status::SUCCESS ? res.bytes_written : 0;
-    },
+    }),
     0ul,
     thrust::plus<size_t>());
 
@@ -47,9 +48,9 @@ writer_compression_statistics collect_compression_statistics(
       rmm::exec_policy(stream),
       zipped_begin,
       zipped_end,
-      [status] __device__(auto tup) {
+      cuda::proclaim_return_type<size_t>([status] __device__(auto tup) {
         return thrust::get<1>(tup).status == status ? thrust::get<0>(tup).size() : 0;
-      },
+      }),
       0ul,
       thrust::plus<size_t>());
   };
diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu
index ec936b85761..da9fb802a0a 100644
--- a/cpp/src/io/orc/reader_impl_decode.cu
+++ b/cpp/src/io/orc/reader_impl_decode.cu
@@ -692,8 +692,7 @@ std::vector<range> find_table_splits(table_view const& input,
      d_sizes  = d_segmented_sizes->view().begin<size_type>()] __device__(auto const segment_idx) {
       // Since the number of rows may not divisible by segment_length,
       // the last segment may be shorter than the others.
-      auto const current_length =
-        cuda::std::min(segment_length, num_rows - segment_length * segment_idx);
+      auto const current_length = min(segment_length, num_rows - segment_length * segment_idx);
       auto const size = d_sizes[segment_idx] / CHAR_BIT;  // divide by CHAR_BIT to get size in bytes
       return cumulative_size{static_cast<std::size_t>(current_length),
                              static_cast<std::size_t>(size)};
diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu
index dd44b779402..89dbbcb796c 100644
--- a/cpp/src/io/orc/stripe_init.cu
+++ b/cpp/src/io/orc/stripe_init.cu
@@ -561,20 +561,26 @@ void __host__ ParseCompressedStripeData(CompressedStreamInfo* strm_info,
                                         uint32_t log2maxcr,
                                         rmm::cuda_stream_view stream)
 {
-  dim3 dim_block(128, 1);
-  dim3 dim_grid((num_streams + 3) >> 2, 1);  // 1 stream per warp, 4 warps per block
-  gpuParseCompressedStripeData<<<dim_grid, dim_block, 0, stream.value()>>>(
-    strm_info, num_streams, compression_block_size, log2maxcr);
+  auto const num_blocks = (num_streams + 3) >> 2;  // 1 stream per warp, 4 warps per block
+  if (num_blocks > 0) {
+    dim3 dim_block(128, 1);
+    dim3 dim_grid(num_blocks, 1);
+    gpuParseCompressedStripeData<<<dim_grid, dim_block, 0, stream.value()>>>(
+      strm_info, num_streams, compression_block_size, log2maxcr);
+  }
 }
 
 void __host__ PostDecompressionReassemble(CompressedStreamInfo* strm_info,
                                           int32_t num_streams,
                                           rmm::cuda_stream_view stream)
 {
-  dim3 dim_block(128, 1);
-  dim3 dim_grid((num_streams + 3) >> 2, 1);  // 1 stream per warp, 4 warps per block
-  gpuPostDecompressionReassemble<<<dim_grid, dim_block, 0, stream.value()>>>(strm_info,
-                                                                             num_streams);
+  auto const num_blocks = (num_streams + 3) >> 2;  // 1 stream per warp, 4 warps per block
+  if (num_blocks > 0) {
+    dim3 dim_block(128, 1);
+    dim3 dim_grid(num_blocks, 1);
+    gpuPostDecompressionReassemble<<<dim_grid, dim_block, 0, stream.value()>>>(strm_info,
+                                                                               num_streams);
+  }
 }
 
 void __host__ ParseRowGroupIndex(RowGroup* row_groups,
diff --git a/cpp/src/io/parquet/page_string_decode.cu b/cpp/src/io/parquet/page_string_decode.cu
index cf1dc58b06a..ba3d35b9586 100644
--- a/cpp/src/io/parquet/page_string_decode.cu
+++ b/cpp/src/io/parquet/page_string_decode.cu
@@ -1197,14 +1197,17 @@ void ComputePageStringSizes(cudf::detail::hostdevice_span<PageInfo> pages,
   cudf::detail::join_streams(streams, stream);
 
   // check for needed temp space for DELTA_BYTE_ARRAY
-  auto const need_sizes = thrust::any_of(
-    rmm::exec_policy(stream), pages.device_begin(), pages.device_end(), [] __device__(auto& page) {
-      return page.temp_string_size != 0;
-    });
+  auto const need_sizes =
+    thrust::any_of(rmm::exec_policy(stream),
+                   pages.device_begin(),
+                   pages.device_end(),
+                   cuda::proclaim_return_type<bool>(
+                     [] __device__(auto& page) { return page.temp_string_size != 0; }));
 
   if (need_sizes) {
     // sum up all of the temp_string_sizes
-    auto const page_sizes = [] __device__(PageInfo const& page) { return page.temp_string_size; };
+    auto const page_sizes = cuda::proclaim_return_type<int64_t>(
+      [] __device__(PageInfo const& page) { return page.temp_string_size; });
     auto const total_size = thrust::transform_reduce(rmm::exec_policy(stream),
                                                      pages.device_begin(),
                                                      pages.device_end(),
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index f533f04e427..7cb982f103d 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -452,9 +452,9 @@ std::string encoding_to_string(Encoding encoding)
 [[nodiscard]] std::string list_unsupported_encodings(device_span<PageInfo const> pages,
                                                      rmm::cuda_stream_view stream)
 {
-  auto const to_mask = [] __device__(auto const& page) {
+  auto const to_mask = cuda::proclaim_return_type<uint32_t>([] __device__(auto const& page) {
     return is_supported_encoding(page.encoding) ? 0U : encoding_to_mask(page.encoding);
-  };
+  });
   uint32_t const unsupported = thrust::transform_reduce(
     rmm::exec_policy(stream), pages.begin(), pages.end(), to_mask, 0U, thrust::bit_or<uint32_t>());
   return encoding_bitmask_to_str(unsupported);
diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu
index c9e507925ec..60cbfbc0dae 100644
--- a/cpp/src/io/utilities/data_casting.cu
+++ b/cpp/src/io/utilities/data_casting.cu
@@ -34,6 +34,7 @@
 #include <rmm/resource_ref.hpp>
 
 #include <cub/cub.cuh>
+#include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/functional.h>
 #include <thrust/transform_reduce.h>
@@ -783,7 +784,8 @@ template <typename SymbolT>
 struct to_string_view_pair {
   SymbolT const* data;
   to_string_view_pair(SymbolT const* _data) : data(_data) {}
-  __device__ auto operator()(thrust::tuple<size_type, size_type> ip)
+  __device__ thrust::pair<char const*, std::size_t> operator()(
+    thrust::tuple<size_type, size_type> ip)
   {
     return thrust::pair<char const*, std::size_t>{data + thrust::get<0>(ip),
                                                   static_cast<std::size_t>(thrust::get<1>(ip))};
@@ -805,7 +807,7 @@ static std::unique_ptr<column> parse_string(string_view_pair_it str_tuples,
     rmm::exec_policy(stream),
     str_tuples,
     str_tuples + col_size,
-    [] __device__(auto t) { return t.second; },
+    cuda::proclaim_return_type<std::size_t>([] __device__(auto t) { return t.second; }),
     size_type{0},
     thrust::maximum<size_type>{});
 
diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu
index ad401bdccba..5048da25e86 100644
--- a/cpp/src/join/distinct_hash_join.cu
+++ b/cpp/src/join/distinct_hash_join.cu
@@ -182,7 +182,7 @@ distinct_hash_join<HasNested>::inner_join(rmm::cuda_stream_view stream,
     thrust::make_transform_output_iterator(probe_indices->begin(), output_fn{});
 
   auto const [probe_indices_end, _] = this->_hash_table.retrieve(
-    iter, iter + probe_table_num_rows, probe_indices_begin, build_indices_begin, stream.value());
+    iter, iter + probe_table_num_rows, probe_indices_begin, build_indices_begin, {stream.value()});
 
   auto const actual_size = std::distance(probe_indices_begin, probe_indices_end);
   build_indices->resize(actual_size, stream);
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index 6785ab9c893..d72ec1085b5 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -219,9 +219,9 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
     rmm::exec_policy(stream),
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(strings_count),
-    [d_offsets] __device__(auto const idx) -> size_type {
+    cuda::proclaim_return_type<size_type>([d_offsets] __device__(auto const idx) -> size_type {
       return static_cast<size_type>(d_offsets[idx + 1] - d_offsets[idx]);
-    },
+    }),
     0,
     thrust::maximum<size_type>{});
 
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index 4b10716706b..be2e33538b9 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -69,7 +69,6 @@ struct InsertTest : public cudf::test::BaseFixture {
 
 using TestTypes = ::testing::Types<key_value_types<int32_t, int32_t>,
                                    key_value_types<int64_t, int64_t>,
-                                   key_value_types<int16_t, int16_t>,
                                    key_value_types<int32_t, float>,
                                    key_value_types<int64_t, double>>;